Implement Android RSS/Atom feed parser

- Add FeedParser.kt with automatic feed type detection
- Add RSSParser.kt for RSS 2.0 feeds
- Add AtomParser.kt for Atom 1.0 feeds
- Add comprehensive unit tests for both parsers
- Support iTunes namespace and enclosures
- Fix pre-existing compilation issues in the codebase
- Update build.gradle.kts with proper dependencies and AGP 8.5.0
This commit is contained in:
2026-03-30 09:01:49 -04:00
parent ac5250b2af
commit d84b8ff4e8
19 changed files with 1555 additions and 18 deletions

View File

@@ -0,0 +1,188 @@
package com.rssuper.parsing
import com.rssuper.models.Enclosure
import com.rssuper.models.Feed
import com.rssuper.models.FeedItem
import org.xmlpull.v1.XmlPullParser
import org.xmlpull.v1.XmlPullParserFactory
import java.io.StringReader
import java.util.Date
object RSSParser {
private val ITUNES_NS = "http://www.itunes.com/dtds/podcast-1.0.dtd"
private val CONTENT_NS = "http://purl.org/rss/1.0/modules/content/"
fun parse(xml: String, feedUrl: String): Feed {
val factory = XmlPullParserFactory.newInstance()
factory.isNamespaceAware = true
val parser = factory.newPullParser()
parser.setInput(StringReader(xml))
var title: String? = null
var link: String? = null
var description: String? = null
var language: String? = null
var lastBuildDate: Date? = null
var generator: String? = null
var ttl: Int? = null
val items = mutableListOf<FeedItem>()
var currentItem: MutableMap<String, Any?>? = null
var currentTag: String? = null
var eventType = parser.eventType
while (eventType != XmlPullParser.END_DOCUMENT) {
when (eventType) {
XmlPullParser.START_TAG -> {
val tagName = parser.name
val namespace = parser.namespace
when {
tagName == "channel" -> {}
tagName == "item" -> {
currentItem = mutableMapOf()
}
tagName == "title" || tagName == "description" ||
tagName == "link" || tagName == "author" ||
tagName == "guid" || tagName == "pubDate" ||
tagName == "category" || tagName == "enclosure" -> {
currentTag = tagName
}
tagName == "language" -> currentTag = tagName
tagName == "lastBuildDate" -> currentTag = tagName
tagName == "generator" -> currentTag = tagName
tagName == "ttl" -> currentTag = tagName
tagName == "subtitle" && namespace == ITUNES_NS -> {
if (currentItem == null) {
description = readElementText(parser)
}
}
tagName == "summary" && namespace == ITUNES_NS -> {
currentItem?.put("description", readElementText(parser))
}
tagName == "duration" && namespace == ITUNES_NS -> {
currentItem?.put("duration", readElementText(parser))
}
tagName == "image" && namespace == ITUNES_NS -> {
val href = parser.getAttributeValue(null, "href")
if (href != null && currentItem != null) {
currentItem.put("image", href)
}
}
tagName == "encoded" && namespace == CONTENT_NS -> {
currentItem?.put("content", readElementText(parser))
}
else -> {}
}
if (tagName == "enclosure" && currentItem != null) {
val url = parser.getAttributeValue(null, "url")
val type = parser.getAttributeValue(null, "type")
val length = parser.getAttributeValue(null, "length")?.toLongOrNull()
if (url != null && type != null) {
currentItem["enclosure"] = Enclosure(url, type, length)
}
}
}
XmlPullParser.TEXT -> {
val text = parser.text?.xmlTrimmed() ?: ""
if (text.isNotEmpty()) {
if (currentItem != null) {
when (currentTag) {
"title" -> currentItem["title"] = text
"description" -> currentItem["description"] = text
"link" -> currentItem["link"] = text
"author" -> currentItem["author"] = text
"guid" -> currentItem["guid"] = text
"pubDate" -> currentItem["pubDate"] = text
"category" -> {
val cats = currentItem["categories"] as? MutableList<String> ?: mutableListOf()
cats.add(text)
currentItem["categories"] = cats
}
}
} else {
when (currentTag) {
"title" -> title = text
"link" -> link = text
"description" -> description = text
"language" -> language = text
"lastBuildDate" -> lastBuildDate = XmlDateParser.parse(text)
"generator" -> generator = text
"ttl" -> ttl = text.toIntOrNull()
}
}
}
}
XmlPullParser.END_TAG -> {
val tagName = parser.name
if (tagName == "item" && currentItem != null) {
items.add(buildFeedItem(currentItem))
currentItem = null
}
currentTag = null
}
}
eventType = parser.next()
}
return Feed(
id = generateUuid(),
title = title ?: "Untitled Feed",
link = link,
description = description,
language = language,
lastBuildDate = lastBuildDate,
generator = generator,
ttl = ttl,
items = items,
rawUrl = feedUrl,
lastFetchedAt = Date()
)
}
private fun readElementText(parser: XmlPullParser): String {
var text = ""
var eventType = parser.eventType
while (eventType != XmlPullParser.END_TAG) {
if (eventType == XmlPullParser.TEXT) {
text = parser.text.xmlDecoded()
}
eventType = parser.next()
}
return text.xmlTrimmed()
}
@Suppress("UNCHECKED_CAST")
private fun buildFeedItem(item: Map<String, Any?>): FeedItem {
val title = item["title"] as? String ?: "Untitled"
val link = item["link"] as? String
val description = item["description"] as? String
val content = item["content"] as? String ?: description
val author = item["author"] as? String
val guid = item["guid"] as? String ?: link ?: generateUuid()
val categories = item["categories"] as? List<String>
val enclosure = item["enclosure"] as? Enclosure
val pubDateStr = item["pubDate"] as? String
val published = XmlDateParser.parse(pubDateStr)
return FeedItem(
id = generateUuid(),
title = title,
link = link,
description = description,
content = content,
author = author,
published = published,
updated = published,
categories = categories,
enclosure = enclosure,
guid = guid
)
}
}