246 lines
8.7 KiB
Vala
246 lines
8.7 KiB
Vala
/*
|
|
* AtomParser.vala
|
|
*
|
|
* Atom 1.0 feed parser
|
|
*/
|
|
|
|
public class RSSuper.AtomParser : Object {
|
|
private string feed_url;
|
|
private Feed? current_feed;
|
|
private FeedItem? current_item;
|
|
private string[] current_categories;
|
|
private bool in_feed;
|
|
private bool in_entry;
|
|
|
|
public AtomParser() {}
|
|
|
|
public ParseResult parse(string xml_content, string url) {
|
|
this.feed_url = url;
|
|
|
|
Xml.Doc* doc = Xml.Parser.parse_doc(xml_content);
|
|
if (doc == null) {
|
|
return ParseResult.error("Failed to parse XML document");
|
|
}
|
|
|
|
Xml.Node* root = doc->get_root_element();
|
|
if (root == null) {
|
|
delete doc;
|
|
return ParseResult.error("No root element found");
|
|
}
|
|
|
|
string name = root->name;
|
|
if (name == null || name != "feed") {
|
|
delete doc;
|
|
return ParseResult.error("Not an Atom feed: root element is '%s'".printf(name ?? "unknown"));
|
|
}
|
|
|
|
Xml.Ns* ns = root->ns;
|
|
if (ns != null && ns->href != null && ns->href != "http://www.w3.org/2005/Atom") {
|
|
delete doc;
|
|
return ParseResult.error("Not an Atom 1.0 feed");
|
|
}
|
|
|
|
parse_element(root);
|
|
delete doc;
|
|
|
|
if (current_feed == null) {
|
|
return ParseResult.error("No feed element found");
|
|
}
|
|
|
|
current_feed.raw_url = url;
|
|
|
|
return ParseResult.success(current_feed);
|
|
}
|
|
|
|
private void parse_element(Xml.Node* node) {
|
|
string? name = node->name;
|
|
if (name == null) {
|
|
return;
|
|
}
|
|
|
|
switch (name) {
|
|
case "feed":
|
|
in_feed = true;
|
|
current_feed = new Feed();
|
|
current_categories = {};
|
|
iterate_children(node);
|
|
in_feed = false;
|
|
break;
|
|
|
|
case "entry":
|
|
in_entry = true;
|
|
current_item = new FeedItem();
|
|
current_categories = {};
|
|
iterate_children(node);
|
|
if (current_item != null && current_item.title != "") {
|
|
if (current_item.id == "") {
|
|
current_item.id = current_item.guid ?? current_item.link ?? current_item.title;
|
|
}
|
|
if (current_feed != null) {
|
|
current_feed.add_item(current_item);
|
|
}
|
|
}
|
|
in_entry = false;
|
|
break;
|
|
|
|
case "title":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_entry && current_item != null && text != null) {
|
|
current_item.title = text;
|
|
} else if (in_feed && current_feed != null && text != null) {
|
|
current_feed.title = text;
|
|
}
|
|
break;
|
|
|
|
case "subtitle":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (current_feed != null && text != null) {
|
|
current_feed.subtitle = text;
|
|
}
|
|
break;
|
|
|
|
case "link":
|
|
var href = node->get_prop("href");
|
|
var rel = node->get_prop("rel");
|
|
|
|
if (in_feed && href != null) {
|
|
if (current_feed != null && (rel == null || rel == "alternate")) {
|
|
if (current_feed.link == null) {
|
|
current_feed.link = href;
|
|
}
|
|
}
|
|
} else if (in_entry && href != null) {
|
|
if (current_item != null && (rel == null || rel == "alternate")) {
|
|
if (current_item.link == null) {
|
|
current_item.link = href;
|
|
}
|
|
} else if (rel == "enclosure") {
|
|
var type = node->get_prop("type");
|
|
var length = node->get_prop("length");
|
|
if (current_item != null) {
|
|
current_item.enclosure_url = href;
|
|
current_item.enclosure_type = type;
|
|
current_item.enclosure_length = length;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "summary":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_entry && current_item != null) {
|
|
if (current_item.description == null && text != null) {
|
|
current_item.description = text;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "content":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_entry && current_item != null) {
|
|
if (current_item.content == null && text != null) {
|
|
current_item.content = text;
|
|
}
|
|
if (current_item.description == null && text != null) {
|
|
current_item.description = text;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "id":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_entry && current_item != null && current_item.guid == null && text != null) {
|
|
current_item.guid = text;
|
|
}
|
|
break;
|
|
|
|
case "updated":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_feed && current_feed != null && text != null) {
|
|
current_feed.updated = text;
|
|
} else if (in_entry && current_item != null && text != null) {
|
|
current_item.updated = text;
|
|
}
|
|
break;
|
|
|
|
case "published":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (in_entry && current_item != null && text != null) {
|
|
current_item.published = text;
|
|
}
|
|
break;
|
|
|
|
case "author":
|
|
if (in_entry && current_item != null) {
|
|
Xml.Node* child = node->first_element_child();
|
|
while (child != null) {
|
|
string? child_name = child->name;
|
|
if (child_name == "name") {
|
|
var text = child->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
if (current_item.author == null && text != null) {
|
|
current_item.author = text;
|
|
}
|
|
}
|
|
}
|
|
child = child->next_element_sibling();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "generator":
|
|
var text = node->get_content();
|
|
if (text != null) {
|
|
text = text.strip();
|
|
}
|
|
if (current_feed != null && text != null) {
|
|
current_feed.generator = text;
|
|
}
|
|
break;
|
|
|
|
case "category":
|
|
var term = node->get_prop("term");
|
|
if (current_item != null && term != null) {
|
|
var new_categories = new string[current_categories.length + 1];
|
|
for (var i = 0; i < current_categories.length; i++) {
|
|
new_categories[i] = current_categories[i];
|
|
}
|
|
new_categories[current_categories.length] = term;
|
|
current_categories = new_categories;
|
|
current_item.categories = current_categories;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
private void iterate_children(Xml.Node* node) {
|
|
Xml.Node* child = node->first_element_child();
|
|
while (child != null) {
|
|
parse_element(child);
|
|
child = child->next_element_sibling();
|
|
}
|
|
}
|
|
}
|