Files
RSSuper/native-route/linux/src/parser/atom-parser.vala

246 lines
8.7 KiB
Vala

/*
* AtomParser.vala
*
* Atom 1.0 feed parser
*/
public class RSSuper.AtomParser : Object {
private string feed_url;
private Feed? current_feed;
private FeedItem? current_item;
private string[] current_categories;
private bool in_feed;
private bool in_entry;
public AtomParser() {}
public ParseResult parse(string xml_content, string url) {
this.feed_url = url;
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
if (doc == null) {
return ParseResult.error("Failed to parse XML document");
}
Xml.Node* root = doc->get_root_element();
if (root == null) {
delete doc;
return ParseResult.error("No root element found");
}
string name = root->name;
if (name == null || name != "feed") {
delete doc;
return ParseResult.error("Not an Atom feed: root element is '%s'".printf(name ?? "unknown"));
}
Xml.Ns* ns = root->ns;
if (ns != null && ns->href != null && ns->href != "http://www.w3.org/2005/Atom") {
delete doc;
return ParseResult.error("Not an Atom 1.0 feed");
}
parse_element(root);
delete doc;
if (current_feed == null) {
return ParseResult.error("No feed element found");
}
current_feed.raw_url = url;
return ParseResult.success(current_feed);
}
private void parse_element(Xml.Node* node) {
string? name = node->name;
if (name == null) {
return;
}
switch (name) {
case "feed":
in_feed = true;
current_feed = new Feed();
current_categories = {};
iterate_children(node);
in_feed = false;
break;
case "entry":
in_entry = true;
current_item = new FeedItem();
current_categories = {};
iterate_children(node);
if (current_item != null && current_item.title != "") {
if (current_item.id == "") {
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
}
if (current_feed != null) {
current_feed.add_item(current_item);
}
}
in_entry = false;
break;
case "title":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && text != null) {
current_item.title = text;
} else if (in_feed && current_feed != null && text != null) {
current_feed.title = text;
}
break;
case "subtitle":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.subtitle = text;
}
break;
case "link":
var href = node->get_prop("href");
var rel = node->get_prop("rel");
if (in_feed && href != null) {
if (current_feed != null && (rel == null || rel == "alternate")) {
if (current_feed.link == null) {
current_feed.link = href;
}
}
} else if (in_entry && href != null) {
if (current_item != null && (rel == null || rel == "alternate")) {
if (current_item.link == null) {
current_item.link = href;
}
} else if (rel == "enclosure") {
var type = node->get_prop("type");
var length = node->get_prop("length");
if (current_item != null) {
current_item.enclosure_url = href;
current_item.enclosure_type = type;
current_item.enclosure_length = length;
}
}
}
break;
case "summary":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null) {
if (current_item.description == null && text != null) {
current_item.description = text;
}
}
break;
case "content":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null) {
if (current_item.content == null && text != null) {
current_item.content = text;
}
if (current_item.description == null && text != null) {
current_item.description = text;
}
}
break;
case "id":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && current_item.guid == null && text != null) {
current_item.guid = text;
}
break;
case "updated":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_feed && current_feed != null && text != null) {
current_feed.updated = text;
} else if (in_entry && current_item != null && text != null) {
current_item.updated = text;
}
break;
case "published":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (in_entry && current_item != null && text != null) {
current_item.published = text;
}
break;
case "author":
if (in_entry && current_item != null) {
Xml.Node* child = node->first_element_child();
while (child != null) {
string? child_name = child->name;
if (child_name == "name") {
var text = child->get_content();
if (text != null) {
text = text.strip();
if (current_item.author == null && text != null) {
current_item.author = text;
}
}
}
child = child->next_element_sibling();
}
}
break;
case "generator":
var text = node->get_content();
if (text != null) {
text = text.strip();
}
if (current_feed != null && text != null) {
current_feed.generator = text;
}
break;
case "category":
var term = node->get_prop("term");
if (current_item != null && term != null) {
var new_categories = new string[current_categories.length + 1];
for (var i = 0; i < current_categories.length; i++) {
new_categories[i] = current_categories[i];
}
new_categories[current_categories.length] = term;
current_categories = new_categories;
current_item.categories = current_categories;
}
break;
}
}
private void iterate_children(Xml.Node* node) {
Xml.Node* child = node->first_element_child();
while (child != null) {
parse_element(child);
child = child->next_element_sibling();
}
}
}