11: Implement Linux RSS/Atom feed parser

This commit is contained in:
2026-03-30 09:38:06 -04:00
parent d84b8ff4e8
commit bbc1363bcc
1646 changed files with 46543 additions and 669 deletions

View File

@@ -16,6 +16,7 @@ gio_dep = dependency('gio-2.0', version: '>= 2.58')
json_dep = dependency('json-glib-1.0', version: '>= 1.4')
sqlite_dep = dependency('sqlite3', version: '>= 3.0')
gobject_dep = dependency('gobject-2.0', version: '>= 2.58')
xml_dep = dependency('libxml-2.0', version: '>= 2.0')
# Source files
models = files(
@@ -37,6 +38,15 @@ database = files(
'src/database/search-history-store.vala',
)
# Parser files
parser = files(
'src/parser/feed-type.vala',
'src/parser/parse-result.vala',
'src/parser/rss-parser.vala',
'src/parser/atom-parser.vala',
'src/parser/feed-parser.vala',
)
# Main library
models_lib = library('rssuper-models', models,
dependencies: [glib_dep, gio_dep, json_dep],
@@ -51,14 +61,32 @@ database_lib = library('rssuper-database', database,
vala_args: ['--vapidir', 'src/database', '--pkg', 'sqlite3']
)
# Parser library
parser_lib = library('rssuper-parser', parser,
dependencies: [glib_dep, gio_dep, json_dep, xml_dep],
link_with: [models_lib],
install: false,
vala_args: ['--vapidir', 'src/parser', '--pkg', 'libxml-2.0']
)
# Test executable
test_exe = executable('database-tests',
'src/tests/database-tests.vala',
dependencies: [glib_dep, gio_dep, json_dep, sqlite_dep, gobject_dep],
link_with: [models_lib, database_lib],
vala_args: ['--vapidir', '.', '--pkg', 'sqlite3'],
dependencies: [glib_dep, gio_dep, json_dep, sqlite_dep, gobject_dep, xml_dep],
link_with: [models_lib, database_lib, parser_lib],
vala_args: ['--vapidir', '.', '--pkg', 'sqlite3', '--pkg', 'libxml-2.0'],
install: false
)
# Test definition
# Parser test executable
parser_test_exe = executable('parser-tests',
'src/tests/parser-tests.vala',
dependencies: [glib_dep, gio_dep, json_dep, xml_dep],
link_with: [models_lib, parser_lib],
vala_args: ['--vapidir', '.', '--pkg', 'libxml-2.0'],
install: false
)
# Test definitions
test('database tests', test_exe)
test('parser tests', parser_test_exe)

View File

@@ -0,0 +1,245 @@
/*
* AtomParser.vala
*
* Atom 1.0 feed parser
*/
public class RSSuper.AtomParser : Object {
private string feed_url;
private Feed? current_feed;
private FeedItem? current_item;
private string[] current_categories;
private bool in_feed;
private bool in_entry;
public AtomParser() {}
public ParseResult parse(string xml_content, string url) {
this.feed_url = url;
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
if (doc == null) {
return ParseResult.error("Failed to parse XML document");
}
Xml.Node* root = doc->get_root_element();
if (root == null) {
delete doc;
return ParseResult.error("No root element found");
}
string name = root->name;
if (name == null || name != "feed") {
delete doc;
return ParseResult.error("Not an Atom feed: root element is '%s'".printf(name ?? "unknown"));
}
Xml.Ns* ns = root->ns;
if (ns != null && ns->href != null && ns->href != "http://www.w3.org/2005/Atom") {
delete doc;
return ParseResult.error("Not an Atom 1.0 feed");
}
parse_element(root);
delete doc;
if (current_feed == null) {
return ParseResult.error("No feed element found");
}
current_feed.raw_url = url;
return ParseResult.success(current_feed);
}
private void parse_element(Xml.Node* node) {
string? name = node->name;
if (name == null) {
return;
}
switch (name) {
case "feed":
in_feed = true;
current_feed = new Feed();
current_categories = {};
iterate_children(node);
in_feed = false;
break;
case "entry":
in_entry = true;
current_item = new FeedItem();
current_categories = {};
iterate_children(node);
if (current_item != null && current_item.title != "") {
if (current_item.id == "") {
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
}
if (current_feed != null) {
current_feed.add_item(current_item);
}
}
in_entry = false;
break;
case "title":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && text != null) {
current_item.title = text;
} else if (in_feed && current_feed != null && text != null) {
current_feed.title = text;
}
break;
case "subtitle":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.subtitle = text;
}
break;
case "link":
var href = node->get_prop("href");
var rel = node->get_prop("rel");
if (in_feed && href != null) {
if (current_feed != null && (rel == null || rel == "alternate")) {
if (current_feed.link == null) {
current_feed.link = href;
}
}
} else if (in_entry && href != null) {
if (current_item != null && (rel == null || rel == "alternate")) {
if (current_item.link == null) {
current_item.link = href;
}
} else if (rel == "enclosure") {
var type = node->get_prop("type");
var length = node->get_prop("length");
if (current_item != null) {
current_item.enclosure_url = href;
current_item.enclosure_type = type;
current_item.enclosure_length = length;
}
}
}
break;
case "summary":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null) {
if (current_item.description == null && text != null) {
current_item.description = text;
}
}
break;
case "content":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null) {
if (current_item.content == null && text != null) {
current_item.content = text;
}
if (current_item.description == null && text != null) {
current_item.description = text;
}
}
break;
case "id":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && current_item.guid == null && text != null) {
current_item.guid = text;
}
break;
case "updated":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_feed && current_feed != null && text != null) {
current_feed.updated = text;
} else if (in_entry && current_item != null && text != null) {
current_item.updated = text;
}
break;
case "published":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && text != null) {
current_item.published = text;
}
break;
case "author":
if (in_entry && current_item != null) {
Xml.Node* child = node->first_element_child();
while (child != null) {
string? child_name = child->name;
if (child_name == "name") {
var text = child->get_content();
if (text != null) {
text = text.strip();
if (current_item.author == null && text != null) {
current_item.author = text;
}
}
}
child = child->next_element_sibling();
}
}
break;
case "generator":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.generator = text;
}
break;
case "category":
var term = node->get_prop("term");
if (current_item != null && term != null) {
var new_categories = new string[current_categories.length + 1];
for (var i = 0; i < current_categories.length; i++) {
new_categories[i] = current_categories[i];
}
new_categories[current_categories.length] = term;
current_categories = new_categories;
current_item.categories = current_categories;
}
break;
}
}
private void iterate_children(Xml.Node* node) {
Xml.Node* child = node->first_element_child();
while (child != null) {
parse_element(child);
child = child->next_element_sibling();
}
}
}

View File

@@ -0,0 +1,88 @@
/*
* FeedParser.vala
*
* Main feed parser that detects and handles both RSS and Atom feeds
*/
public class RSSuper.FeedParser : Object {
private RSSParser rss_parser;
private AtomParser atom_parser;
public FeedParser() {
this.rss_parser = new RSSParser();
this.atom_parser = new AtomParser();
}
public ParseResult parse(string xml_content, string url) {
var type = detect_feed_type(xml_content);
switch (type) {
case FeedType.ATOM:
return atom_parser.parse(xml_content, url);
case FeedType.RSS_1_0:
case FeedType.RSS_2_0:
default:
return rss_parser.parse(xml_content, url);
}
}
public FeedType detect_feed_type(string xml_content) {
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
if (doc == null) {
return FeedType.UNKNOWN;
}
Xml.Node* root = doc->get_root_element();
if (root == null) {
delete doc;
return FeedType.UNKNOWN;
}
string? name = root->name;
if (name == "feed") {
Xml.Ns* ns = root->ns;
if (ns == null || ns->href == null || ns->href == "http://www.w3.org/2005/Atom") {
delete doc;
return FeedType.ATOM;
}
}
if (name == "rss") {
string? version = root->get_prop("version");
delete doc;
if (version == "2.0") {
return FeedType.RSS_2_0;
}
if (version == "0.91" || version == "0.92") {
return FeedType.RSS_2_0;
}
if (version == "1.0") {
return FeedType.RSS_1_0;
}
return FeedType.RSS_2_0;
}
delete doc;
if (name == "RDF") {
return FeedType.RSS_1_0;
}
return FeedType.UNKNOWN;
}
public ParseResult parse_from_content_type(string xml_content, string url, string? content_type = null) {
if (content_type != null) {
var type = FeedType.from_string(content_type);
if (type == FeedType.ATOM) {
return atom_parser.parse(xml_content, url);
}
if (type == FeedType.RSS_1_0 || type == FeedType.RSS_2_0) {
return rss_parser.parse(xml_content, url);
}
}
return parse(xml_content, url);
}
}

View File

@@ -0,0 +1,41 @@
/*
* FeedType.vala
*
* Enum for RSS/Atom feed types
*/
public enum RSSuper.FeedType {
UNKNOWN,
RSS_1_0,
RSS_2_0,
ATOM;
public static FeedType from_string(string type) {
switch (type.down()) {
case "rss":
case "application/rss+xml":
return RSS_2_0;
case "atom":
case "application/atom+xml":
return ATOM;
case "rdf":
case "application/rdf+xml":
return RSS_1_0;
default:
return UNKNOWN;
}
}
public string to_string() {
switch (this) {
case RSS_1_0:
return "RSS 1.0";
case RSS_2_0:
return "RSS 2.0";
case ATOM:
return "Atom";
default:
return "Unknown";
}
}
}

View File

@@ -0,0 +1,61 @@
/*
* ParseResult.vala
*
* Result type for feed parsing operations
*/
public class RSSuper.ParseError : Object {
public string message { get; private set; }
public int code { get; private set; }
public ParseError(string message, int code = 0) {
this.message = message;
this.code = code;
}
}
public class RSSuper.ParseResult : Object {
private Object? _value;
private ParseError? _error;
public bool ok { get; private set; }
private Type _value_type;
private ParseResult() {}
public static ParseResult success(Object value) {
var result = new ParseResult();
result.ok = true;
result._value = value;
result._value_type = value.get_type();
return result;
}
public static ParseResult error(string message, int code = 0) {
var result = new ParseResult();
result.ok = false;
result._error = new ParseError(message, code);
return result;
}
public Object? get_value() {
return this._value;
}
public T? get_value_as<T>() {
if (!ok) {
return null;
}
if (_value is T) {
return (T)_value;
}
return null;
}
public ParseError? get_error() {
return this._error;
}
public bool is_type<T>() {
return ok && _value_type == typeof(T);
}
}

View File

@@ -0,0 +1,348 @@
/*
* RSSParser.vala
*
* RSS 2.0 feed parser
*/
public class RSSuper.RSSParser : Object {
private string feed_url;
private Feed? current_feed;
private FeedItem? current_item;
private string[] current_categories;
private bool in_item;
private bool in_channel;
private bool in_image;
private bool in_entry;
public RSSParser() {}
public ParseResult parse(string xml_content, string url) {
this.feed_url = url;
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
if (doc == null) {
return ParseResult.error("Failed to parse XML document");
}
Xml.Node* root = doc->get_root_element();
if (root == null) {
delete doc;
return ParseResult.error("No root element found");
}
string name = root->name;
if (name == null || name != "rss") {
delete doc;
return ParseResult.error("Not an RSS feed: root element is '%s'".printf(name ?? "unknown"));
}
string? version = root->get_prop("version");
if (version != null && version != "2.0" && version != "0.91" && version != "0.92") {
delete doc;
return ParseResult.error("Unsupported RSS version: %s".printf(version));
}
iterate_children(root);
delete doc;
if (current_feed == null) {
return ParseResult.error("No channel element found");
}
current_feed.raw_url = url;
return ParseResult.success(current_feed);
}
private void parse_element(Xml.Node* node) {
string? name = node->name;
if (name == null) {
return;
}
switch (name) {
case "channel":
in_channel = true;
current_feed = new Feed();
current_categories = {};
iterate_children(node);
in_channel = false;
break;
case "item":
in_item = true;
current_item = new FeedItem();
current_categories = {};
iterate_children(node);
if (current_item != null && current_item.title != "") {
if (current_item.id == "") {
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
}
if (current_feed != null) {
current_feed.add_item(current_item);
}
}
in_item = false;
break;
case "entry":
in_entry = true;
current_item = new FeedItem();
current_categories = {};
iterate_children(node);
if (current_item != null && current_item.title != "") {
if (current_item.id == "") {
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
}
if (current_feed != null) {
current_feed.add_item(current_item);
}
}
in_entry = false;
break;
case "image":
in_image = true;
iterate_children(node);
in_image = false;
break;
case "title":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_item || in_entry) {
if (current_item != null && text != null) {
current_item.title = text;
}
} else if (in_channel || in_image) {
if (current_feed != null && text != null) {
current_feed.title = text;
}
}
break;
case "link":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_channel) {
if (current_feed != null && current_feed.link == null && text != null) {
current_feed.link = text;
}
} else if (in_item || in_entry) {
if (current_item != null && current_item.link == null && text != null) {
current_item.link = text;
}
}
break;
case "description":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_item || in_entry) {
if (current_item != null && current_item.description == null && text != null) {
current_item.description = text;
}
} else if (in_channel) {
if (current_feed != null && text != null) {
current_feed.description = text;
}
}
break;
case "subtitle":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.subtitle = text;
}
break;
case "language":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.language = text;
}
break;
case "lastBuildDate":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.last_build_date = text;
}
break;
case "updated":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.updated = text;
} else if (current_item != null && text != null) {
current_item.updated = text;
}
break;
case "generator":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.generator = text;
}
break;
case "ttl":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.ttl = int.parse(text);
}
break;
case "author":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && text != null) {
current_item.author = text;
}
break;
case "dc:creator":
case "creator":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && current_item.author == null && text != null) {
current_item.author = text;
}
break;
case "pubDate":
case "published":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && text != null) {
current_item.published = text;
}
break;
case "guid":
case "id":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && current_item.guid == null && text != null) {
current_item.guid = text;
}
break;
case "category":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && text != null) {
var new_categories = new string[current_categories.length + 1];
for (var i = 0; i < current_categories.length; i++) {
new_categories[i] = current_categories[i];
}
new_categories[current_categories.length] = text;
current_categories = new_categories;
current_item.categories = current_categories;
}
break;
case "enclosure":
var url = node->get_prop("url");
var type = node->get_prop("type");
var length = node->get_prop("length");
if (current_item != null && url != null) {
current_item.enclosure_url = url;
current_item.enclosure_type = type;
current_item.enclosure_length = length;
}
break;
case "content:encoded":
case "content":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && text != null) {
current_item.content = text;
}
break;
case "itunes:author":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null && current_item.author == null && text != null) {
current_item.author = text;
}
break;
case "itunes:summary":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_item != null) {
if (current_item.description == null && text != null) {
current_item.description = text;
}
}
break;
case "url":
if (in_image && current_feed != null) {
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed.link == null && text != null) {
current_feed.link = text;
}
}
break;
default:
iterate_children(node);
break;
}
}
private void iterate_children(Xml.Node* node) {
Xml.Node* child = node->first_element_child();
while (child != null) {
parse_element(child);
child = child->next_element_sibling();
}
}
}

View File

@@ -0,0 +1,347 @@
/*
* ParserTests.vala
*
* Unit tests for RSS/Atom feed parser.
*/
public class RSSuper.ParserTests {
public static int main(string[] args) {
var tests = new ParserTests();
tests.test_rss_parsing();
tests.test_atom_parsing();
tests.test_feed_type_detection();
tests.test_malformed_xml();
tests.test_itunes_namespace();
tests.test_enclosures();
print("All parser tests passed!\n");
return 0;
}
public void test_rss_parsing() {
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<link>https://example.com</link>
<description>A test RSS feed</description>
<language>en</language>
<lastBuildDate>Mon, 01 Jan 2024 12:00:00 GMT</lastBuildDate>
<ttl>60</ttl>
<item>
<title>First Post</title>
<link>https://example.com/post1</link>
<description>This is the first post</description>
<pubDate>Mon, 01 Jan 2024 12:00:00 GMT</pubDate>
<guid>post-1</guid>
</item>
<item>
<title>Second Post</title>
<link>https://example.com/post2</link>
<description>This is the second post</description>
<pubDate>Tue, 02 Jan 2024 12:00:00 GMT</pubDate>
<guid>post-2</guid>
</item>
</channel>
</rss>""";
var parser = new FeedParser();
var result = parser.parse(rss_content, "https://example.com/feed.xml");
print("RSS parsing result ok: %s\n", result.ok ? "true" : "false");
if (!result.ok) {
printerr("FAIL: RSS parsing failed: %s\n", result.get_error().message);
return;
}
var feed = result.get_value() as Feed;
if (feed == null) {
printerr("FAIL: Expected Feed object\n");
return;
}
print("Feed title: '%s'\n", feed.title);
print("Feed link: '%s'\n", feed.link);
print("Feed description: '%s'\n", feed.description);
print("Items length: %d\n", feed.items.length);
if (feed.items.length > 0) {
print("First item title: '%s'\n", feed.items[0].title);
}
if (feed.items.length > 1) {
print("Second item title: '%s'\n", feed.items[1].title);
}
if (feed.title != "Test Feed") {
printerr("FAIL: Expected title 'Test Feed', got '%s'\n", feed.title);
return;
}
if (feed.link != "https://example.com") {
printerr("FAIL: Expected link 'https://example.com', got '%s'\n", feed.link);
return;
}
if (feed.description != "A test RSS feed") {
printerr("FAIL: Expected description 'A test RSS feed', got '%s'\n", feed.description);
return;
}
if (feed.items.length != 2) {
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
return;
}
if (feed.items[0].title != "First Post") {
printerr("FAIL: Expected first item title 'First Post', got '%s'\n", feed.items[0].title);
return;
}
if (feed.items[1].title != "Second Post") {
printerr("FAIL: Expected second item title 'Second Post', got '%s'\n", feed.items[1].title);
return;
}
print("PASS: test_rss_parsing\n");
}
public void test_atom_parsing() {
var atom_content = """<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Test Atom Feed</title>
<subtitle>A test Atom feed</subtitle>
<link href="https://example.com" rel="alternate"/>
<link href="https://example.com/feed.xml" rel="self"/>
<updated>2024-01-01T12:00:00Z</updated>
<id>urn:uuid:feed-123</id>
<entry>
<title>First Entry</title>
<link href="https://example.com/entry1" rel="alternate"/>
<summary>This is the first entry</summary>
<updated>2024-01-01T12:00:00Z</updated>
<published>2024-01-01T12:00:00Z</published>
<id>urn:uuid:entry-1</id>
<author>
<name>Test Author</name>
</author>
</entry>
<entry>
<title>Second Entry</title>
<link href="https://example.com/entry2" rel="alternate"/>
<summary>This is the second entry</summary>
<updated>2024-01-02T12:00:00Z</updated>
<published>2024-01-02T12:00:00Z</published>
<id>urn:uuid:entry-2</id>
</entry>
</feed>""";
var parser = new FeedParser();
var result = parser.parse(atom_content, "https://example.com/feed.xml");
if (!result.ok) {
printerr("FAIL: Atom parsing failed: %s\n", result.get_error().message);
return;
}
var feed = result.get_value() as Feed;
if (feed == null) {
printerr("FAIL: Expected Feed object\n");
return;
}
if (feed.title != "Test Atom Feed") {
printerr("FAIL: Expected title 'Test Atom Feed', got '%s'\n", feed.title);
return;
}
if (feed.link != "https://example.com") {
printerr("FAIL: Expected link 'https://example.com', got '%s'\n", feed.link);
return;
}
if (feed.subtitle != "A test Atom feed") {
printerr("FAIL: Expected subtitle 'A test Atom feed', got '%s'\n", feed.subtitle);
return;
}
if (feed.items.length != 2) {
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
return;
}
if (feed.items[0].title != "First Entry") {
printerr("FAIL: Expected first item title 'First Entry', got '%s'\n", feed.items[0].title);
return;
}
if (feed.items[0].author != "Test Author") {
printerr("FAIL: Expected first item author 'Test Author', got '%s'\n", feed.items[0].author);
return;
}
if (feed.items[0].description != "This is the first entry") {
printerr("FAIL: Expected first item description 'This is the first entry', got '%s'\n", feed.items[0].description);
return;
}
print("PASS: test_atom_parsing\n");
}
public void test_feed_type_detection() {
var parser = new FeedParser();
var rss_content = """<?xml version="1.0"?><rss version="2.0"><channel><title>Test</title></channel></rss>""";
var type = parser.detect_feed_type(rss_content);
if (type != FeedType.RSS_2_0) {
printerr("FAIL: Expected RSS 2.0, got %s\n", type.to_string());
return;
}
var atom_content = """<?xml version="1.0"?><feed xmlns="http://www.w3.org/2005/Atom"><title>Test</title></feed>""";
type = parser.detect_feed_type(atom_content);
if (type != FeedType.ATOM) {
printerr("FAIL: Expected Atom, got %s\n", type.to_string());
return;
}
var rdf_content = """<?xml version="1.0"?><RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><channel><title>Test</title></channel></RDF>""";
type = parser.detect_feed_type(rdf_content);
if (type != FeedType.RSS_1_0) {
printerr("FAIL: Expected RSS 1.0, got %s\n", type.to_string());
return;
}
print("PASS: test_feed_type_detection\n");
}
public void test_malformed_xml() {
var parser = new FeedParser();
var result = parser.parse("not xml at all", "https://example.com/feed.xml");
if (result.ok) {
printerr("FAIL: Expected parsing to fail for malformed XML\n");
return;
}
result = parser.parse("<rss><channel>", "https://example.com/feed.xml");
if (result.ok) {
printerr("FAIL: Expected parsing to fail for incomplete XML\n");
return;
}
print("PASS: test_malformed_xml\n");
}
public void test_itunes_namespace() {
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<title>Podcast Feed</title>
<link>https://example.com</link>
<itunes:author>Podcast Author</itunes:author>
<itunes:summary>A podcast feed</itunes:summary>
<item>
<title>Episode 1</title>
<link>https://example.com/episode1</link>
<description>Episode summary</description>
<itunes:author>Episode Author</itunes:author>
<enclosure url="https://example.com/episode1.mp3" type="audio/mpeg" length="12345678"/>
</item>
</channel>
</rss>""";
var parser = new FeedParser();
var result = parser.parse(rss_content, "https://example.com/feed.xml");
if (!result.ok) {
printerr("FAIL: iTunes parsing failed: %s\n", result.get_error().message);
return;
}
var feed = result.get_value() as Feed;
if (feed == null) {
printerr("FAIL: Expected Feed object\n");
return;
}
if (feed.items.length != 1) {
printerr("FAIL: Expected 1 item, got %d\n", feed.items.length);
return;
}
if (feed.items[0].author != "Episode Author") {
printerr("FAIL: Expected author 'Episode Author', got '%s'\n", feed.items[0].author);
return;
}
if (feed.items[0].description != "Episode summary") {
printerr("FAIL: Expected description 'Episode summary', got '%s'\n", feed.items[0].description);
return;
}
print("PASS: test_itunes_namespace\n");
}
public void test_enclosures() {
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Enclosure Test</title>
<link>https://example.com</link>
<item>
<title>Post with Enclosure</title>
<link>https://example.com/post</link>
<enclosure url="https://example.com/file.mp3" type="audio/mpeg" length="12345678"/>
</item>
<item>
<title>Post without Enclosure</title>
<link>https://example.com/post2</link>
</item>
</channel>
</rss>""";
var parser = new FeedParser();
var result = parser.parse(rss_content, "https://example.com/feed.xml");
if (!result.ok) {
printerr("FAIL: Enclosure parsing failed: %s\n", result.get_error().message);
return;
}
var feed = result.get_value() as Feed;
if (feed == null) {
printerr("FAIL: Expected Feed object\n");
return;
}
if (feed.items.length != 2) {
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
return;
}
if (feed.items[0].enclosure_url != "https://example.com/file.mp3") {
printerr("FAIL: Expected enclosure_url 'https://example.com/file.mp3', got '%s'\n", feed.items[0].enclosure_url);
return;
}
if (feed.items[0].enclosure_type != "audio/mpeg") {
printerr("FAIL: Expected enclosure_type 'audio/mpeg', got '%s'\n", feed.items[0].enclosure_type);
return;
}
if (feed.items[0].enclosure_length != "12345678") {
printerr("FAIL: Expected enclosure_length '12345678', got '%s'\n", feed.items[0].enclosure_length);
return;
}
if (feed.items[1].enclosure_url != null) {
printerr("FAIL: Expected no enclosure for second item\n");
return;
}
print("PASS: test_enclosures\n");
}
}