Skip to content

Commit 7b2cdc2

Browse files
authored
chore: Update rss recipe to use handlers again, pull out and test RSS parsing (#1931)
1 parent 73c22a2 commit 7b2cdc2

File tree

3 files changed

+148
-124
lines changed

3 files changed

+148
-124
lines changed

recipes/dom-parser.test.ts renamed to recipes/rss-utils.test.ts

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { DOMParser } from "./dom-parser.ts";
2-
import { assert } from "@std/assert";
2+
import { assert, assertObjectMatch } from "@std/assert";
3+
import { FeedItem, parseRSSFeed } from "./rss-utils.ts";
34

45
const xml = `<?xml version="1.0" encoding="UTF-8"?>
56
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
@@ -38,7 +39,7 @@ const xml = `<?xml version="1.0" encoding="UTF-8"?>
3839
</feed>
3940
`;
4041

41-
Deno.test("DOMParser parsers XML", () => {
42+
Deno.test("DOMParser/XML", () => {
4243
const parser = new DOMParser();
4344
const doc = parser.parseFromString(xml, "text/xml");
4445
const entries = doc.getElementsByTagName("entry");
@@ -60,3 +61,24 @@ Deno.test("DOMParser parsers XML", () => {
6061
"Get textContent",
6162
);
6263
});
64+
65+
Deno.test("parseRSSFeed()", () => {
66+
const entries = parseRSSFeed(
67+
xml,
68+
5,
69+
new Set(["tag:www.githubstatus.com,2005:Incident/26833707"]),
70+
);
71+
assert(
72+
entries.length === 2,
73+
"Expecting 2 entries after filtering one existing",
74+
);
75+
assertObjectMatch(entries[0], {
76+
author: "",
77+
id: "tag:www.githubstatus.com,2005:Incident/26837586",
78+
pubDate: "2025-10-21T17:39:34Z",
79+
title: "Disruption with some GitHub services",
80+
link: "https://www.githubstatus.com/incidents/v61nk2fpysnq",
81+
content:
82+
"&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:39&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Resolved&lt;/strong&gt; - This incident has been resolved. Thank you for your patience and understanding as we addressed this issue. A detailed root cause analysis will be shared as soon as it is available.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:18&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - Mitigation continues, the impact is limited to Enterprise Cloud customers who have configured SAML at the organization level.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;17:11&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We continuing to work on mitigation of this issue.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:33&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We’ve identified the issue affecting some users with SAML/OIDC authentication and are actively working on mitigation. Some users may not be able to authenticate during this time.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:03&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Update&lt;/strong&gt; - We're seeing issues for a small amount of customers with SAML/OIDC authentication for GitHub.com users. We are investigating.&lt;/p&gt;&lt;p&gt;&lt;small&gt;Oct &lt;var data-var='date'&gt;21&lt;/var&gt;, &lt;var data-var='time'&gt;16:00&lt;/var&gt; UTC&lt;/small&gt;&lt;br&gt;&lt;strong&gt;Investigating&lt;/strong&gt; - We are currently investigating this issue.&lt;/p&gt;",
83+
} as FeedItem);
84+
});

recipes/rss-utils.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { DOMParser, type Element } from "./dom-parser.ts";
2+
3+
export type FeedItem = {
4+
id: string;
5+
title: string;
6+
link: string;
7+
description: string;
8+
pubDate: string;
9+
author: string;
10+
content: string;
11+
};
12+
13+
export function parseRSSFeed(
14+
textXML: string,
15+
maxResults: number = 100,
16+
existingIds: Set<string>,
17+
): FeedItem[] {
18+
const parser = new DOMParser();
19+
const doc = parser.parseFromString(textXML, "text/xml");
20+
// Helper function to get text content from an element
21+
const getTextContent = (element: Element | null, tagName: string) => {
22+
const el = element?.getElementsByTagName(tagName)[0];
23+
return el?.textContent?.trim() || "";
24+
};
25+
26+
// Helper function to get attribute value
27+
const getAttributeValue = (
28+
element: Element | null,
29+
tagName: string,
30+
attrName: string,
31+
) => {
32+
const el = element?.getElementsByTagName(tagName)[0];
33+
return el?.getAttribute(attrName) || "";
34+
};
35+
36+
const retrievedItems: FeedItem[] = [];
37+
38+
// Check if it's an Atom feed
39+
const isAtom = doc.getElementsByTagName("feed").length !== 0;
40+
41+
if (isAtom) {
42+
// Parse Atom feed
43+
const entries = doc.getElementsByTagName("entry");
44+
45+
for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
46+
const entry = entries[i];
47+
48+
// In Atom, id is mandatory
49+
const id = getTextContent(entry, "id") || Math.random().toString(36);
50+
51+
// Skip if we already have this item
52+
if (existingIds.has(id)) {
53+
continue;
54+
}
55+
56+
// Parse link - in Atom links are elements with href attributes
57+
const link = getAttributeValue(entry, "link", "href");
58+
59+
// For content, check content tag first, then summary
60+
const content = getTextContent(entry, "content") ||
61+
getTextContent(entry, "summary");
62+
63+
// For author, it might be nested as <author><name>Author</name></author>
64+
let author = "";
65+
const authorEl = entry.getElementsByTagName("author")[0];
66+
if (authorEl) {
67+
author = getTextContent(authorEl, "name");
68+
}
69+
70+
// For pubDate, Atom uses <published> or <updated>
71+
const pubDate = getTextContent(entry, "published") ||
72+
getTextContent(entry, "updated");
73+
74+
retrievedItems.push({
75+
id,
76+
title: getTextContent(entry, "title"),
77+
link,
78+
description: getTextContent(entry, "summary"),
79+
pubDate,
80+
author,
81+
content,
82+
});
83+
}
84+
} else {
85+
// Parse RSS feed
86+
const rssItems = doc.getElementsByTagName("item");
87+
88+
for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
89+
const item = rssItems[i];
90+
91+
const id = getTextContent(item, "guid") ||
92+
getTextContent(item, "link") ||
93+
Math.random().toString(36);
94+
95+
if (existingIds.has(id)) {
96+
continue;
97+
}
98+
99+
retrievedItems.push({
100+
id,
101+
title: getTextContent(item, "title"),
102+
link: getTextContent(item, "link"),
103+
description: getTextContent(item, "description"),
104+
pubDate: getTextContent(item, "pubDate"),
105+
author: getTextContent(item, "author"),
106+
content: getTextContent(item, "content:encoded") ||
107+
getTextContent(item, "description"),
108+
});
109+
}
110+
}
111+
112+
return retrievedItems;
113+
}

recipes/rss.tsx

Lines changed: 11 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,152 +1,41 @@
11
/// <cts-enable />
22
import {
3+
Cell,
34
cell,
45
Default,
56
derive,
67
fetchData,
7-
lift,
8+
handler,
89
NAME,
910
recipe,
1011
str,
1112
UI,
1213
} from "commontools";
13-
import { DOMParser, type Element } from "./dom-parser.ts";
14+
import { type FeedItem, parseRSSFeed } from "./rss-utils.ts";
1415

1516
interface Settings {
1617
feedUrl: Default<string, "">;
1718
limit: Default<number, 100>;
1819
}
1920

20-
type FeedItem = {
21-
id: string;
22-
title: string;
23-
link: string;
24-
description: string;
25-
pubDate: string;
26-
author: string;
27-
content: string;
28-
};
29-
30-
function parseRSSFeed(
31-
textXML: string,
32-
maxResults: number = 100,
33-
existingIds: Set<string>,
34-
): FeedItem[] {
35-
const parser = new DOMParser();
36-
const doc = parser.parseFromString(textXML, "text/xml");
37-
// Helper function to get text content from an element
38-
const getTextContent = (element: Element | null, tagName: string) => {
39-
const el = element?.getElementsByTagName(tagName)[0];
40-
return el?.textContent?.trim() || "";
41-
};
42-
43-
// Helper function to get attribute value
44-
const getAttributeValue = (
45-
element: Element | null,
46-
tagName: string,
47-
attrName: string,
48-
) => {
49-
const el = element?.getElementsByTagName(tagName)[0];
50-
return el?.getAttribute(attrName) || "";
51-
};
52-
53-
const retrievedItems: FeedItem[] = [];
54-
55-
// Check if it's an Atom feed
56-
const isAtom = doc.getElementsByTagName("feed").length !== 0;
57-
58-
if (isAtom) {
59-
// Parse Atom feed
60-
const entries = doc.getElementsByTagName("entry");
61-
62-
for (let i = 0; i < Math.min(entries.length, maxResults); i++) {
63-
const entry = entries[i];
64-
65-
// In Atom, id is mandatory
66-
const id = getTextContent(entry, "id") || Math.random().toString(36);
67-
68-
// Skip if we already have this item
69-
if (existingIds.has(id)) {
70-
continue;
71-
}
72-
73-
// Parse link - in Atom links are elements with href attributes
74-
const link = getAttributeValue(entry, "link", "href");
75-
76-
// For content, check content tag first, then summary
77-
const content = getTextContent(entry, "content") ||
78-
getTextContent(entry, "summary");
79-
80-
// For author, it might be nested as <author><name>Author</name></author>
81-
let author = "";
82-
const authorEl = entry.getElementsByTagName("author")[0];
83-
if (authorEl) {
84-
author = getTextContent(authorEl, "name");
85-
}
86-
87-
// For pubDate, Atom uses <published> or <updated>
88-
const pubDate = getTextContent(entry, "published") ||
89-
getTextContent(entry, "updated");
90-
91-
retrievedItems.push({
92-
id,
93-
title: getTextContent(entry, "title"),
94-
link,
95-
description: getTextContent(entry, "summary"),
96-
pubDate,
97-
author,
98-
content,
99-
});
100-
}
101-
} else {
102-
// Parse RSS feed
103-
const rssItems = doc.getElementsByTagName("item");
104-
105-
for (let i = 0; i < Math.min(rssItems.length, maxResults); i++) {
106-
const item = rssItems[i];
107-
108-
const id = getTextContent(item, "guid") ||
109-
getTextContent(item, "link") ||
110-
Math.random().toString(36);
111-
112-
if (existingIds.has(id)) {
113-
continue;
114-
}
115-
116-
retrievedItems.push({
117-
id,
118-
title: getTextContent(item, "title"),
119-
link: getTextContent(item, "link"),
120-
description: getTextContent(item, "description"),
121-
pubDate: getTextContent(item, "pubDate"),
122-
author: getTextContent(item, "author"),
123-
content: getTextContent(item, "content:encoded") ||
124-
getTextContent(item, "description"),
125-
});
126-
}
127-
}
128-
129-
return retrievedItems;
130-
}
131-
132-
const feedUpdater = lift<{
133-
items: FeedItem[];
21+
const feedUpdater = handler<never, {
22+
items: Cell<FeedItem[]>;
13423
settings: Settings;
135-
}>(({ settings, items }) => {
24+
}>((_, { items, settings }) => {
13625
if (!settings.feedUrl) {
13726
console.warn("no feed URL provided");
13827
return;
13928
}
14029

14130
const query = fetchData({ url: settings.feedUrl, mode: "text" });
14231
return derive(
143-
{ items, result: query.result, limit: settings.limit },
144-
({ result, limit, items }) => {
145-
if (!result || typeof result !== "string") return;
32+
{ items, query, limit: settings.limit },
33+
({ query, limit, items }) => {
34+
if (!query.result || typeof query.result !== "string") return;
14635
const newEntries = parseRSSFeed(
147-
result as string,
36+
query.result as string,
14837
limit,
149-
new Set(items.map((item) => item.id)),
38+
new Set(items.get().map((item) => item.id)),
15039
);
15140
items.push(...newEntries);
15241
},

0 commit comments

Comments
 (0)