11: Implement Linux RSS/Atom feed parser
This commit is contained in:
245
native-route/linux/src/parser/atom-parser.vala
Normal file
245
native-route/linux/src/parser/atom-parser.vala
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* AtomParser.vala
|
||||
*
|
||||
* Atom 1.0 feed parser
|
||||
*/
|
||||
|
||||
public class RSSuper.AtomParser : Object {
|
||||
private string feed_url;
|
||||
private Feed? current_feed;
|
||||
private FeedItem? current_item;
|
||||
private string[] current_categories;
|
||||
private bool in_feed;
|
||||
private bool in_entry;
|
||||
|
||||
public AtomParser() {}
|
||||
|
||||
public ParseResult parse(string xml_content, string url) {
|
||||
this.feed_url = url;
|
||||
|
||||
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
|
||||
if (doc == null) {
|
||||
return ParseResult.error("Failed to parse XML document");
|
||||
}
|
||||
|
||||
Xml.Node* root = doc->get_root_element();
|
||||
if (root == null) {
|
||||
delete doc;
|
||||
return ParseResult.error("No root element found");
|
||||
}
|
||||
|
||||
string name = root->name;
|
||||
if (name == null || name != "feed") {
|
||||
delete doc;
|
||||
return ParseResult.error("Not an Atom feed: root element is '%s'".printf(name ?? "unknown"));
|
||||
}
|
||||
|
||||
Xml.Ns* ns = root->ns;
|
||||
if (ns != null && ns->href != null && ns->href != "http://www.w3.org/2005/Atom") {
|
||||
delete doc;
|
||||
return ParseResult.error("Not an Atom 1.0 feed");
|
||||
}
|
||||
|
||||
parse_element(root);
|
||||
delete doc;
|
||||
|
||||
if (current_feed == null) {
|
||||
return ParseResult.error("No feed element found");
|
||||
}
|
||||
|
||||
current_feed.raw_url = url;
|
||||
|
||||
return ParseResult.success(current_feed);
|
||||
}
|
||||
|
||||
private void parse_element(Xml.Node* node) {
|
||||
string? name = node->name;
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (name) {
|
||||
case "feed":
|
||||
in_feed = true;
|
||||
current_feed = new Feed();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
in_feed = false;
|
||||
break;
|
||||
|
||||
case "entry":
|
||||
in_entry = true;
|
||||
current_item = new FeedItem();
|
||||
current_categories = {};
|
||||
iterate_children(node);
|
||||
if (current_item != null && current_item.title != "") {
|
||||
if (current_item.id == "") {
|
||||
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
|
||||
}
|
||||
if (current_feed != null) {
|
||||
current_feed.add_item(current_item);
|
||||
}
|
||||
}
|
||||
in_entry = false;
|
||||
break;
|
||||
|
||||
case "title":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && text != null) {
|
||||
current_item.title = text;
|
||||
} else if (in_feed && current_feed != null && text != null) {
|
||||
current_feed.title = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "subtitle":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.subtitle = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "link":
|
||||
var href = node->get_prop("href");
|
||||
var rel = node->get_prop("rel");
|
||||
|
||||
if (in_feed && href != null) {
|
||||
if (current_feed != null && (rel == null || rel == "alternate")) {
|
||||
if (current_feed.link == null) {
|
||||
current_feed.link = href;
|
||||
}
|
||||
}
|
||||
} else if (in_entry && href != null) {
|
||||
if (current_item != null && (rel == null || rel == "alternate")) {
|
||||
if (current_item.link == null) {
|
||||
current_item.link = href;
|
||||
}
|
||||
} else if (rel == "enclosure") {
|
||||
var type = node->get_prop("type");
|
||||
var length = node->get_prop("length");
|
||||
if (current_item != null) {
|
||||
current_item.enclosure_url = href;
|
||||
current_item.enclosure_type = type;
|
||||
current_item.enclosure_length = length;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "summary":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null) {
|
||||
if (current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "content":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null) {
|
||||
if (current_item.content == null && text != null) {
|
||||
current_item.content = text;
|
||||
}
|
||||
if (current_item.description == null && text != null) {
|
||||
current_item.description = text;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "id":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && current_item.guid == null && text != null) {
|
||||
current_item.guid = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "updated":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_feed && current_feed != null && text != null) {
|
||||
current_feed.updated = text;
|
||||
} else if (in_entry && current_item != null && text != null) {
|
||||
current_item.updated = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "published":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (in_entry && current_item != null && text != null) {
|
||||
current_item.published = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "author":
|
||||
if (in_entry && current_item != null) {
|
||||
Xml.Node* child = node->first_element_child();
|
||||
while (child != null) {
|
||||
string? child_name = child->name;
|
||||
if (child_name == "name") {
|
||||
var text = child->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
if (current_item.author == null && text != null) {
|
||||
current_item.author = text;
|
||||
}
|
||||
}
|
||||
}
|
||||
child = child->next_element_sibling();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "generator":
|
||||
var text = node->get_content();
|
||||
if (text != null) {
|
||||
text = text.strip();
|
||||
}
|
||||
if (current_feed != null && text != null) {
|
||||
current_feed.generator = text;
|
||||
}
|
||||
break;
|
||||
|
||||
case "category":
|
||||
var term = node->get_prop("term");
|
||||
if (current_item != null && term != null) {
|
||||
var new_categories = new string[current_categories.length + 1];
|
||||
for (var i = 0; i < current_categories.length; i++) {
|
||||
new_categories[i] = current_categories[i];
|
||||
}
|
||||
new_categories[current_categories.length] = term;
|
||||
current_categories = new_categories;
|
||||
current_item.categories = current_categories;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void iterate_children(Xml.Node* node) {
|
||||
Xml.Node* child = node->first_element_child();
|
||||
while (child != null) {
|
||||
parse_element(child);
|
||||
child = child->next_element_sibling();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user