11: Implement Linux RSS/Atom feed parser
This commit is contained in:
@@ -16,6 +16,7 @@ gio_dep = dependency('gio-2.0', version: '>= 2.58')
|
||||
json_dep = dependency('json-glib-1.0', version: '>= 1.4')
|
||||
sqlite_dep = dependency('sqlite3', version: '>= 3.0')
|
||||
gobject_dep = dependency('gobject-2.0', version: '>= 2.58')
|
||||
xml_dep = dependency('libxml-2.0', version: '>= 2.0')
|
||||
|
||||
# Source files
|
||||
models = files(
|
||||
@@ -37,6 +38,15 @@ database = files(
|
||||
'src/database/search-history-store.vala',
|
||||
)
|
||||
|
||||
# Parser files
|
||||
parser = files(
|
||||
'src/parser/feed-type.vala',
|
||||
'src/parser/parse-result.vala',
|
||||
'src/parser/rss-parser.vala',
|
||||
'src/parser/atom-parser.vala',
|
||||
'src/parser/feed-parser.vala',
|
||||
)
|
||||
|
||||
# Main library
|
||||
models_lib = library('rssuper-models', models,
|
||||
dependencies: [glib_dep, gio_dep, json_dep],
|
||||
@@ -51,14 +61,32 @@ database_lib = library('rssuper-database', database,
|
||||
vala_args: ['--vapidir', 'src/database', '--pkg', 'sqlite3']
|
||||
)
|
||||
|
||||
# Parser library
|
||||
parser_lib = library('rssuper-parser', parser,
|
||||
dependencies: [glib_dep, gio_dep, json_dep, xml_dep],
|
||||
link_with: [models_lib],
|
||||
install: false,
|
||||
vala_args: ['--vapidir', 'src/parser', '--pkg', 'libxml-2.0']
|
||||
)
|
||||
|
||||
# Test executable
|
||||
test_exe = executable('database-tests',
|
||||
'src/tests/database-tests.vala',
|
||||
dependencies: [glib_dep, gio_dep, json_dep, sqlite_dep, gobject_dep],
|
||||
link_with: [models_lib, database_lib],
|
||||
vala_args: ['--vapidir', '.', '--pkg', 'sqlite3'],
|
||||
dependencies: [glib_dep, gio_dep, json_dep, sqlite_dep, gobject_dep, xml_dep],
|
||||
link_with: [models_lib, database_lib, parser_lib],
|
||||
vala_args: ['--vapidir', '.', '--pkg', 'sqlite3', '--pkg', 'libxml-2.0'],
|
||||
install: false
|
||||
)
|
||||
|
||||
# Test definition
|
||||
# Parser test executable
|
||||
parser_test_exe = executable('parser-tests',
|
||||
'src/tests/parser-tests.vala',
|
||||
dependencies: [glib_dep, gio_dep, json_dep, xml_dep],
|
||||
link_with: [models_lib, parser_lib],
|
||||
vala_args: ['--vapidir', '.', '--pkg', 'libxml-2.0'],
|
||||
install: false
|
||||
)
|
||||
|
||||
# Test definitions
|
||||
test('database tests', test_exe)
|
||||
test('parser tests', parser_test_exe)
|
||||
|
||||
245
native-route/linux/src/parser/atom-parser.vala
Normal file
245
native-route/linux/src/parser/atom-parser.vala
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* AtomParser.vala
|
||||
*
|
||||
* Atom 1.0 feed parser
|
||||
*/
|
||||
|
||||
public class RSSuper.AtomParser : Object {
|
||||
private string feed_url;
|
||||
private Feed? current_feed;
|
||||
private FeedItem? current_item;
|
||||
private string[] current_categories;
|
||||
private bool in_feed;
|
||||
private bool in_entry;
|
||||
|
||||
public AtomParser() {}
|
||||
|
||||
public ParseResult parse(string xml_content, string url) {
|
||||
this.feed_url = url;
|
||||
|
||||
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
|
||||
if (doc == null) {
|
||||
return ParseResult.error("Failed to parse XML document");
|
||||
}
|
||||
|
||||
Xml.Node* root = doc->get_root_element();
|
||||
if (root == null) {
|
||||
delete doc;
|
||||
return ParseResult.error("No root element found");
|
||||
}
|
||||
|
||||
string name = root->name;
|
||||
if (name == null || name != "feed") {
|
||||
delete doc;
|
||||
return ParseResult.error("Not an Atom feed: root element is '%s'".printf(name ?? "unknown"));
|
||||
}
|
||||
|
||||
Xml.Ns* ns = root->ns;
|
||||
if (ns != null && ns->href != null && ns->href != "http://www.w3.org/2005/Atom") {
|
||||
delete doc;
|
||||
return ParseResult.error("Not an Atom 1.0 feed");
|
||||
}
|
||||
|
||||
parse_element(root);
|
||||
delete doc;
|
||||
|
||||
if (current_feed == null) {
|
||||
return ParseResult.error("No feed element found");
|
||||
}
|
||||
|
||||
current_feed.raw_url = url;
|
||||
|
||||
return ParseResult.success(current_feed);
|
||||
}
|
||||
|
||||
private void parse_element(Xml.Node* node) {
|
||||
string? name = node->name;
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (name) {
|
||||
case "feed":
|
||||
in_feed = true;
|
||||
current_feed = new Feed();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
in_feed = false;
|
||||
break;
|
||||
|
||||
case "entry":
|
||||
in_entry = true;
|
||||
current_item = new FeedItem();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
if (current_item != null && current_item.title != "") {
|
||||
if (current_item.id == "") {
|
||||
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
|
||||
}
|
||||
if (current_feed != null) {
|
||||
current_feed.add_item(current_item);
|
||||
}
|
||||
}
|
||||
in_entry = false;
|
||||
break;
|
||||
|
||||
case "title":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && text != null) {
|
||||
current_item.title = text;
|
||||
} else if (in_feed && current_feed != null && text != null) {
|
||||
current_feed.title = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "subtitle":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.subtitle = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "link":
|
||||
var href = node->get_prop("href");
|
||||
var rel = node->get_prop("rel");
|
||||
|
||||
if (in_feed && href != null) {
|
||||
if (current_feed != null && (rel == null || rel == "alternate")) {
|
||||
if (current_feed.link == null) {
|
||||
current_feed.link = href;
|
||||
}
|
||||
}
|
||||
} else if (in_entry && href != null) {
|
||||
if (current_item != null && (rel == null || rel == "alternate")) {
|
||||
if (current_item.link == null) {
|
||||
current_item.link = href;
|
||||
}
|
||||
} else if (rel == "enclosure") {
|
||||
var type = node->get_prop("type");
|
||||
var length = node->get_prop("length");
|
||||
if (current_item != null) {
|
||||
current_item.enclosure_url = href;
|
||||
current_item.enclosure_type = type;
|
||||
current_item.enclosure_length = length;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "summary":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null) {
|
||||
if (current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "content":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null) {
|
||||
if (current_item.content == null && text != null) {
|
||||
current_item.content = text;
|
||||
}
|
||||
if (current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "id":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && current_item.guid == null && text != null) {
|
||||
current_item.guid = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "updated":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_feed && current_feed != null && text != null) {
|
||||
current_feed.updated = text;
|
||||
} else if (in_entry && current_item != null && text != null) {
|
||||
current_item.updated = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "published":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && text != null) {
|
||||
current_item.published = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "author":
|
||||
if (in_entry && current_item != null) {
|
||||
Xml.Node* child = node->first_element_child();
|
||||
while (child != null) {
|
||||
string? child_name = child->name;
|
||||
if (child_name == "name") {
|
||||
var text = child->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
if (current_item.author == null && text != null) {
|
||||
current_item.author = text;
|
||||
}
|
||||
}
|
||||
}
|
||||
child = child->next_element_sibling();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "generator":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.generator = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "category":
|
||||
var term = node->get_prop("term");
|
||||
if (current_item != null && term != null) {
|
||||
var new_categories = new string[current_categories.length + 1];
|
||||
for (var i = 0; i < current_categories.length; i++) {
|
||||
new_categories[i] = current_categories[i];
|
||||
}
|
||||
new_categories[current_categories.length] = term;
|
||||
current_categories = new_categories;
|
||||
current_item.categories = current_categories;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void iterate_children(Xml.Node* node) {
|
||||
Xml.Node* child = node->first_element_child();
|
||||
while (child != null) {
|
||||
parse_element(child);
|
||||
child = child->next_element_sibling();
|
||||
}
|
||||
}
|
||||
}
|
||||
88
native-route/linux/src/parser/feed-parser.vala
Normal file
88
native-route/linux/src/parser/feed-parser.vala
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* FeedParser.vala
|
||||
*
|
||||
* Main feed parser that detects and handles both RSS and Atom feeds
|
||||
*/
|
||||
|
||||
public class RSSuper.FeedParser : Object {
|
||||
private RSSParser rss_parser;
|
||||
private AtomParser atom_parser;
|
||||
|
||||
public FeedParser() {
|
||||
this.rss_parser = new RSSParser();
|
||||
this.atom_parser = new AtomParser();
|
||||
}
|
||||
|
||||
public ParseResult parse(string xml_content, string url) {
|
||||
var type = detect_feed_type(xml_content);
|
||||
|
||||
switch (type) {
|
||||
case FeedType.ATOM:
|
||||
return atom_parser.parse(xml_content, url);
|
||||
case FeedType.RSS_1_0:
|
||||
case FeedType.RSS_2_0:
|
||||
default:
|
||||
return rss_parser.parse(xml_content, url);
|
||||
}
|
||||
}
|
||||
|
||||
public FeedType detect_feed_type(string xml_content) {
|
||||
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
|
||||
if (doc == null) {
|
||||
return FeedType.UNKNOWN;
|
||||
}
|
||||
|
||||
Xml.Node* root = doc->get_root_element();
|
||||
if (root == null) {
|
||||
delete doc;
|
||||
return FeedType.UNKNOWN;
|
||||
}
|
||||
|
||||
string? name = root->name;
|
||||
|
||||
if (name == "feed") {
|
||||
Xml.Ns* ns = root->ns;
|
||||
if (ns == null || ns->href == null || ns->href == "http://www.w3.org/2005/Atom") {
|
||||
delete doc;
|
||||
return FeedType.ATOM;
|
||||
}
|
||||
}
|
||||
|
||||
if (name == "rss") {
|
||||
string? version = root->get_prop("version");
|
||||
delete doc;
|
||||
if (version == "2.0") {
|
||||
return FeedType.RSS_2_0;
|
||||
}
|
||||
if (version == "0.91" || version == "0.92") {
|
||||
return FeedType.RSS_2_0;
|
||||
}
|
||||
if (version == "1.0") {
|
||||
return FeedType.RSS_1_0;
|
||||
}
|
||||
return FeedType.RSS_2_0;
|
||||
}
|
||||
|
||||
delete doc;
|
||||
|
||||
if (name == "RDF") {
|
||||
return FeedType.RSS_1_0;
|
||||
}
|
||||
|
||||
return FeedType.UNKNOWN;
|
||||
}
|
||||
|
||||
public ParseResult parse_from_content_type(string xml_content, string url, string? content_type = null) {
|
||||
if (content_type != null) {
|
||||
var type = FeedType.from_string(content_type);
|
||||
if (type == FeedType.ATOM) {
|
||||
return atom_parser.parse(xml_content, url);
|
||||
}
|
||||
if (type == FeedType.RSS_1_0 || type == FeedType.RSS_2_0) {
|
||||
return rss_parser.parse(xml_content, url);
|
||||
}
|
||||
}
|
||||
|
||||
return parse(xml_content, url);
|
||||
}
|
||||
}
|
||||
41
native-route/linux/src/parser/feed-type.vala
Normal file
41
native-route/linux/src/parser/feed-type.vala
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* FeedType.vala
|
||||
*
|
||||
* Enum for RSS/Atom feed types
|
||||
*/
|
||||
|
||||
public enum RSSuper.FeedType {
|
||||
UNKNOWN,
|
||||
RSS_1_0,
|
||||
RSS_2_0,
|
||||
ATOM;
|
||||
|
||||
public static FeedType from_string(string type) {
|
||||
switch (type.down()) {
|
||||
case "rss":
|
||||
case "application/rss+xml":
|
||||
return RSS_2_0;
|
||||
case "atom":
|
||||
case "application/atom+xml":
|
||||
return ATOM;
|
||||
case "rdf":
|
||||
case "application/rdf+xml":
|
||||
return RSS_1_0;
|
||||
default:
|
||||
return UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
public string to_string() {
|
||||
switch (this) {
|
||||
case RSS_1_0:
|
||||
return "RSS 1.0";
|
||||
case RSS_2_0:
|
||||
return "RSS 2.0";
|
||||
case ATOM:
|
||||
return "Atom";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
}
|
||||
61
native-route/linux/src/parser/parse-result.vala
Normal file
61
native-route/linux/src/parser/parse-result.vala
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* ParseResult.vala
|
||||
*
|
||||
* Result type for feed parsing operations
|
||||
*/
|
||||
|
||||
public class RSSuper.ParseError : Object {
|
||||
public string message { get; private set; }
|
||||
public int code { get; private set; }
|
||||
|
||||
public ParseError(string message, int code = 0) {
|
||||
this.message = message;
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
|
||||
public class RSSuper.ParseResult : Object {
|
||||
private Object? _value;
|
||||
private ParseError? _error;
|
||||
public bool ok { get; private set; }
|
||||
private Type _value_type;
|
||||
|
||||
private ParseResult() {}
|
||||
|
||||
public static ParseResult success(Object value) {
|
||||
var result = new ParseResult();
|
||||
result.ok = true;
|
||||
result._value = value;
|
||||
result._value_type = value.get_type();
|
||||
return result;
|
||||
}
|
||||
|
||||
public static ParseResult error(string message, int code = 0) {
|
||||
var result = new ParseResult();
|
||||
result.ok = false;
|
||||
result._error = new ParseError(message, code);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Object? get_value() {
|
||||
return this._value;
|
||||
}
|
||||
|
||||
public T? get_value_as<T>() {
|
||||
if (!ok) {
|
||||
return null;
|
||||
}
|
||||
if (_value is T) {
|
||||
return (T)_value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public ParseError? get_error() {
|
||||
return this._error;
|
||||
}
|
||||
|
||||
public bool is_type<T>() {
|
||||
return ok && _value_type == typeof(T);
|
||||
}
|
||||
}
|
||||
348
native-route/linux/src/parser/rss-parser.vala
Normal file
348
native-route/linux/src/parser/rss-parser.vala
Normal file
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
* RSSParser.vala
|
||||
*
|
||||
* RSS 2.0 feed parser
|
||||
*/
|
||||
|
||||
public class RSSuper.RSSParser : Object {
|
||||
private string feed_url;
|
||||
private Feed? current_feed;
|
||||
private FeedItem? current_item;
|
||||
private string[] current_categories;
|
||||
private bool in_item;
|
||||
private bool in_channel;
|
||||
private bool in_image;
|
||||
private bool in_entry;
|
||||
|
||||
public RSSParser() {}
|
||||
|
||||
public ParseResult parse(string xml_content, string url) {
|
||||
this.feed_url = url;
|
||||
|
||||
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
|
||||
if (doc == null) {
|
||||
return ParseResult.error("Failed to parse XML document");
|
||||
}
|
||||
|
||||
Xml.Node* root = doc->get_root_element();
|
||||
if (root == null) {
|
||||
delete doc;
|
||||
return ParseResult.error("No root element found");
|
||||
}
|
||||
|
||||
string name = root->name;
|
||||
if (name == null || name != "rss") {
|
||||
delete doc;
|
||||
return ParseResult.error("Not an RSS feed: root element is '%s'".printf(name ?? "unknown"));
|
||||
}
|
||||
|
||||
string? version = root->get_prop("version");
|
||||
if (version != null && version != "2.0" && version != "0.91" && version != "0.92") {
|
||||
delete doc;
|
||||
return ParseResult.error("Unsupported RSS version: %s".printf(version));
|
||||
}
|
||||
|
||||
iterate_children(root);
|
||||
delete doc;
|
||||
|
||||
if (current_feed == null) {
|
||||
return ParseResult.error("No channel element found");
|
||||
}
|
||||
|
||||
current_feed.raw_url = url;
|
||||
|
||||
return ParseResult.success(current_feed);
|
||||
}
|
||||
|
||||
private void parse_element(Xml.Node* node) {
|
||||
string? name = node->name;
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (name) {
|
||||
case "channel":
|
||||
in_channel = true;
|
||||
current_feed = new Feed();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
in_channel = false;
|
||||
break;
|
||||
|
||||
case "item":
|
||||
in_item = true;
|
||||
current_item = new FeedItem();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
if (current_item != null && current_item.title != "") {
|
||||
if (current_item.id == "") {
|
||||
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
|
||||
}
|
||||
if (current_feed != null) {
|
||||
current_feed.add_item(current_item);
|
||||
}
|
||||
}
|
||||
in_item = false;
|
||||
break;
|
||||
|
||||
case "entry":
|
||||
in_entry = true;
|
||||
current_item = new FeedItem();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
if (current_item != null && current_item.title != "") {
|
||||
if (current_item.id == "") {
|
||||
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
|
||||
}
|
||||
if (current_feed != null) {
|
||||
current_feed.add_item(current_item);
|
||||
}
|
||||
}
|
||||
in_entry = false;
|
||||
break;
|
||||
|
||||
case "image":
|
||||
in_image = true;
|
||||
iterate_children(node);
|
||||
in_image = false;
|
||||
break;
|
||||
|
||||
case "title":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_item || in_entry) {
|
||||
if (current_item != null && text != null) {
|
||||
current_item.title = text;
|
||||
}
|
||||
} else if (in_channel || in_image) {
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.title = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "link":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_channel) {
|
||||
if (current_feed != null && current_feed.link == null && text != null) {
|
||||
current_feed.link = text;
|
||||
}
|
||||
} else if (in_item || in_entry) {
|
||||
if (current_item != null && current_item.link == null && text != null) {
|
||||
current_item.link = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "description":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_item || in_entry) {
|
||||
if (current_item != null && current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
} else if (in_channel) {
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "subtitle":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.subtitle = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "language":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.language = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "lastBuildDate":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.last_build_date = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "updated":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.updated = text;
|
||||
} else if (current_item != null && text != null) {
|
||||
current_item.updated = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "generator":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.generator = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "ttl":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.ttl = int.parse(text);
|
||||
}
|
||||
break;
|
||||
|
||||
case "author":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && text != null) {
|
||||
current_item.author = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "dc:creator":
|
||||
case "creator":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && current_item.author == null && text != null) {
|
||||
current_item.author = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "pubDate":
|
||||
case "published":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && text != null) {
|
||||
current_item.published = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "guid":
|
||||
case "id":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && current_item.guid == null && text != null) {
|
||||
current_item.guid = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "category":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && text != null) {
|
||||
var new_categories = new string[current_categories.length + 1];
|
||||
for (var i = 0; i < current_categories.length; i++) {
|
||||
new_categories[i] = current_categories[i];
|
||||
}
|
||||
new_categories[current_categories.length] = text;
|
||||
current_categories = new_categories;
|
||||
current_item.categories = current_categories;
|
||||
}
|
||||
break;
|
||||
|
||||
case "enclosure":
|
||||
var url = node->get_prop("url");
|
||||
var type = node->get_prop("type");
|
||||
var length = node->get_prop("length");
|
||||
if (current_item != null && url != null) {
|
||||
current_item.enclosure_url = url;
|
||||
current_item.enclosure_type = type;
|
||||
current_item.enclosure_length = length;
|
||||
}
|
||||
break;
|
||||
|
||||
case "content:encoded":
|
||||
case "content":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && text != null) {
|
||||
current_item.content = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "itunes:author":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null && current_item.author == null && text != null) {
|
||||
current_item.author = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "itunes:summary":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_item != null) {
|
||||
if (current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "url":
|
||||
if (in_image && current_feed != null) {
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed.link == null && text != null) {
|
||||
current_feed.link = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
iterate_children(node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void iterate_children(Xml.Node* node) {
|
||||
Xml.Node* child = node->first_element_child();
|
||||
while (child != null) {
|
||||
parse_element(child);
|
||||
child = child->next_element_sibling();
|
||||
}
|
||||
}
|
||||
}
|
||||
347
native-route/linux/src/tests/parser-tests.vala
Normal file
347
native-route/linux/src/tests/parser-tests.vala
Normal file
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* ParserTests.vala
|
||||
*
|
||||
* Unit tests for RSS/Atom feed parser.
|
||||
*/
|
||||
|
||||
public class RSSuper.ParserTests {
|
||||
|
||||
public static int main(string[] args) {
|
||||
var tests = new ParserTests();
|
||||
|
||||
tests.test_rss_parsing();
|
||||
tests.test_atom_parsing();
|
||||
tests.test_feed_type_detection();
|
||||
tests.test_malformed_xml();
|
||||
tests.test_itunes_namespace();
|
||||
tests.test_enclosures();
|
||||
|
||||
print("All parser tests passed!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void test_rss_parsing() {
|
||||
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Test Feed</title>
|
||||
<link>https://example.com</link>
|
||||
<description>A test RSS feed</description>
|
||||
<language>en</language>
|
||||
<lastBuildDate>Mon, 01 Jan 2024 12:00:00 GMT</lastBuildDate>
|
||||
<ttl>60</ttl>
|
||||
<item>
|
||||
<title>First Post</title>
|
||||
<link>https://example.com/post1</link>
|
||||
<description>This is the first post</description>
|
||||
<pubDate>Mon, 01 Jan 2024 12:00:00 GMT</pubDate>
|
||||
<guid>post-1</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Second Post</title>
|
||||
<link>https://example.com/post2</link>
|
||||
<description>This is the second post</description>
|
||||
<pubDate>Tue, 02 Jan 2024 12:00:00 GMT</pubDate>
|
||||
<guid>post-2</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>""";
|
||||
|
||||
var parser = new FeedParser();
|
||||
var result = parser.parse(rss_content, "https://example.com/feed.xml");
|
||||
|
||||
print("RSS parsing result ok: %s\n", result.ok ? "true" : "false");
|
||||
|
||||
if (!result.ok) {
|
||||
printerr("FAIL: RSS parsing failed: %s\n", result.get_error().message);
|
||||
return;
|
||||
}
|
||||
|
||||
var feed = result.get_value() as Feed;
|
||||
if (feed == null) {
|
||||
printerr("FAIL: Expected Feed object\n");
|
||||
return;
|
||||
}
|
||||
|
||||
print("Feed title: '%s'\n", feed.title);
|
||||
print("Feed link: '%s'\n", feed.link);
|
||||
print("Feed description: '%s'\n", feed.description);
|
||||
print("Items length: %d\n", feed.items.length);
|
||||
|
||||
if (feed.items.length > 0) {
|
||||
print("First item title: '%s'\n", feed.items[0].title);
|
||||
}
|
||||
if (feed.items.length > 1) {
|
||||
print("Second item title: '%s'\n", feed.items[1].title);
|
||||
}
|
||||
|
||||
if (feed.title != "Test Feed") {
|
||||
printerr("FAIL: Expected title 'Test Feed', got '%s'\n", feed.title);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.link != "https://example.com") {
|
||||
printerr("FAIL: Expected link 'https://example.com', got '%s'\n", feed.link);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.description != "A test RSS feed") {
|
||||
printerr("FAIL: Expected description 'A test RSS feed', got '%s'\n", feed.description);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items.length != 2) {
|
||||
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].title != "First Post") {
|
||||
printerr("FAIL: Expected first item title 'First Post', got '%s'\n", feed.items[0].title);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[1].title != "Second Post") {
|
||||
printerr("FAIL: Expected second item title 'Second Post', got '%s'\n", feed.items[1].title);
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_rss_parsing\n");
|
||||
}
|
||||
|
||||
public void test_atom_parsing() {
|
||||
var atom_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Test Atom Feed</title>
|
||||
<subtitle>A test Atom feed</subtitle>
|
||||
<link href="https://example.com" rel="alternate"/>
|
||||
<link href="https://example.com/feed.xml" rel="self"/>
|
||||
<updated>2024-01-01T12:00:00Z</updated>
|
||||
<id>urn:uuid:feed-123</id>
|
||||
<entry>
|
||||
<title>First Entry</title>
|
||||
<link href="https://example.com/entry1" rel="alternate"/>
|
||||
<summary>This is the first entry</summary>
|
||||
<updated>2024-01-01T12:00:00Z</updated>
|
||||
<published>2024-01-01T12:00:00Z</published>
|
||||
<id>urn:uuid:entry-1</id>
|
||||
<author>
|
||||
<name>Test Author</name>
|
||||
</author>
|
||||
</entry>
|
||||
<entry>
|
||||
<title>Second Entry</title>
|
||||
<link href="https://example.com/entry2" rel="alternate"/>
|
||||
<summary>This is the second entry</summary>
|
||||
<updated>2024-01-02T12:00:00Z</updated>
|
||||
<published>2024-01-02T12:00:00Z</published>
|
||||
<id>urn:uuid:entry-2</id>
|
||||
</entry>
|
||||
</feed>""";
|
||||
|
||||
var parser = new FeedParser();
|
||||
var result = parser.parse(atom_content, "https://example.com/feed.xml");
|
||||
|
||||
if (!result.ok) {
|
||||
printerr("FAIL: Atom parsing failed: %s\n", result.get_error().message);
|
||||
return;
|
||||
}
|
||||
|
||||
var feed = result.get_value() as Feed;
|
||||
if (feed == null) {
|
||||
printerr("FAIL: Expected Feed object\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.title != "Test Atom Feed") {
|
||||
printerr("FAIL: Expected title 'Test Atom Feed', got '%s'\n", feed.title);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.link != "https://example.com") {
|
||||
printerr("FAIL: Expected link 'https://example.com', got '%s'\n", feed.link);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.subtitle != "A test Atom feed") {
|
||||
printerr("FAIL: Expected subtitle 'A test Atom feed', got '%s'\n", feed.subtitle);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items.length != 2) {
|
||||
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].title != "First Entry") {
|
||||
printerr("FAIL: Expected first item title 'First Entry', got '%s'\n", feed.items[0].title);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].author != "Test Author") {
|
||||
printerr("FAIL: Expected first item author 'Test Author', got '%s'\n", feed.items[0].author);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].description != "This is the first entry") {
|
||||
printerr("FAIL: Expected first item description 'This is the first entry', got '%s'\n", feed.items[0].description);
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_atom_parsing\n");
|
||||
}
|
||||
|
||||
public void test_feed_type_detection() {
|
||||
var parser = new FeedParser();
|
||||
|
||||
var rss_content = """<?xml version="1.0"?><rss version="2.0"><channel><title>Test</title></channel></rss>""";
|
||||
var type = parser.detect_feed_type(rss_content);
|
||||
if (type != FeedType.RSS_2_0) {
|
||||
printerr("FAIL: Expected RSS 2.0, got %s\n", type.to_string());
|
||||
return;
|
||||
}
|
||||
|
||||
var atom_content = """<?xml version="1.0"?><feed xmlns="http://www.w3.org/2005/Atom"><title>Test</title></feed>""";
|
||||
type = parser.detect_feed_type(atom_content);
|
||||
if (type != FeedType.ATOM) {
|
||||
printerr("FAIL: Expected Atom, got %s\n", type.to_string());
|
||||
return;
|
||||
}
|
||||
|
||||
var rdf_content = """<?xml version="1.0"?><RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><channel><title>Test</title></channel></RDF>""";
|
||||
type = parser.detect_feed_type(rdf_content);
|
||||
if (type != FeedType.RSS_1_0) {
|
||||
printerr("FAIL: Expected RSS 1.0, got %s\n", type.to_string());
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_feed_type_detection\n");
|
||||
}
|
||||
|
||||
public void test_malformed_xml() {
|
||||
var parser = new FeedParser();
|
||||
|
||||
var result = parser.parse("not xml at all", "https://example.com/feed.xml");
|
||||
if (result.ok) {
|
||||
printerr("FAIL: Expected parsing to fail for malformed XML\n");
|
||||
return;
|
||||
}
|
||||
|
||||
result = parser.parse("<rss><channel>", "https://example.com/feed.xml");
|
||||
if (result.ok) {
|
||||
printerr("FAIL: Expected parsing to fail for incomplete XML\n");
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_malformed_xml\n");
|
||||
}
|
||||
|
||||
public void test_itunes_namespace() {
|
||||
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Podcast Feed</title>
|
||||
<link>https://example.com</link>
|
||||
<itunes:author>Podcast Author</itunes:author>
|
||||
<itunes:summary>A podcast feed</itunes:summary>
|
||||
<item>
|
||||
<title>Episode 1</title>
|
||||
<link>https://example.com/episode1</link>
|
||||
<description>Episode summary</description>
|
||||
<itunes:author>Episode Author</itunes:author>
|
||||
<enclosure url="https://example.com/episode1.mp3" type="audio/mpeg" length="12345678"/>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>""";
|
||||
|
||||
var parser = new FeedParser();
|
||||
var result = parser.parse(rss_content, "https://example.com/feed.xml");
|
||||
|
||||
if (!result.ok) {
|
||||
printerr("FAIL: iTunes parsing failed: %s\n", result.get_error().message);
|
||||
return;
|
||||
}
|
||||
|
||||
var feed = result.get_value() as Feed;
|
||||
if (feed == null) {
|
||||
printerr("FAIL: Expected Feed object\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items.length != 1) {
|
||||
printerr("FAIL: Expected 1 item, got %d\n", feed.items.length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].author != "Episode Author") {
|
||||
printerr("FAIL: Expected author 'Episode Author', got '%s'\n", feed.items[0].author);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].description != "Episode summary") {
|
||||
printerr("FAIL: Expected description 'Episode summary', got '%s'\n", feed.items[0].description);
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_itunes_namespace\n");
|
||||
}
|
||||
|
||||
public void test_enclosures() {
|
||||
var rss_content = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Enclosure Test</title>
|
||||
<link>https://example.com</link>
|
||||
<item>
|
||||
<title>Post with Enclosure</title>
|
||||
<link>https://example.com/post</link>
|
||||
<enclosure url="https://example.com/file.mp3" type="audio/mpeg" length="12345678"/>
|
||||
</item>
|
||||
<item>
|
||||
<title>Post without Enclosure</title>
|
||||
<link>https://example.com/post2</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>""";
|
||||
|
||||
var parser = new FeedParser();
|
||||
var result = parser.parse(rss_content, "https://example.com/feed.xml");
|
||||
|
||||
if (!result.ok) {
|
||||
printerr("FAIL: Enclosure parsing failed: %s\n", result.get_error().message);
|
||||
return;
|
||||
}
|
||||
|
||||
var feed = result.get_value() as Feed;
|
||||
if (feed == null) {
|
||||
printerr("FAIL: Expected Feed object\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items.length != 2) {
|
||||
printerr("FAIL: Expected 2 items, got %d\n", feed.items.length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].enclosure_url != "https://example.com/file.mp3") {
|
||||
printerr("FAIL: Expected enclosure_url 'https://example.com/file.mp3', got '%s'\n", feed.items[0].enclosure_url);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].enclosure_type != "audio/mpeg") {
|
||||
printerr("FAIL: Expected enclosure_type 'audio/mpeg', got '%s'\n", feed.items[0].enclosure_type);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[0].enclosure_length != "12345678") {
|
||||
printerr("FAIL: Expected enclosure_length '12345678', got '%s'\n", feed.items[0].enclosure_length);
|
||||
return;
|
||||
}
|
||||
|
||||
if (feed.items[1].enclosure_url != null) {
|
||||
printerr("FAIL: Expected no enclosure for second item\n");
|
||||
return;
|
||||
}
|
||||
|
||||
print("PASS: test_enclosures\n");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user