- Add FeedParser.kt with automatic feed type detection - Add RSSParser.kt for RSS 2.0 feeds - Add AtomParser.kt for Atom 1.0 feeds - Add comprehensive unit tests for both parsers - Support iTunes namespace and enclosures - Fix pre-existing compilation issues in the codebase - Update build.gradle.kts with proper dependencies and AGP 8.5.0
241 lines
10 KiB
Kotlin
241 lines
10 KiB
Kotlin
package com.rssuper.parsing
|
|
|
|
import com.rssuper.models.Enclosure
|
|
import com.rssuper.models.Feed
|
|
import com.rssuper.models.FeedItem
|
|
import org.xmlpull.v1.XmlPullParser
|
|
import org.xmlpull.v1.XmlPullParserFactory
|
|
import java.io.StringReader
|
|
|
|
object AtomParser {
|
|
|
|
private val ATOM_NS = "http://www.w3.org/2005/Atom"
|
|
private val ITUNES_NS = "http://www.itunes.com/dtds/podcast-1.0.dtd"
|
|
private val MEDIA_NS = "http://search.yahoo.com/mrss/"
|
|
|
|
fun parse(xml: String, feedUrl: String): Feed {
|
|
val factory = XmlPullParserFactory.newInstance()
|
|
factory.isNamespaceAware = true
|
|
val parser = factory.newPullParser()
|
|
parser.setInput(StringReader(xml))
|
|
|
|
var title: String? = null
|
|
var link: String? = null
|
|
var subtitle: String? = null
|
|
var updated: java.util.Date? = null
|
|
var generator: String? = null
|
|
val items = mutableListOf<FeedItem>()
|
|
|
|
var currentItem: MutableMap<String, Any?>? = null
|
|
var currentTag: String? = null
|
|
var inContent = false
|
|
|
|
var eventType = parser.eventType
|
|
while (eventType != XmlPullParser.END_DOCUMENT) {
|
|
when (eventType) {
|
|
XmlPullParser.START_TAG -> {
|
|
val tagName = parser.name
|
|
val namespace = parser.namespace
|
|
|
|
when {
|
|
tagName == "feed" -> {}
|
|
tagName == "entry" -> {
|
|
currentItem = mutableMapOf()
|
|
}
|
|
tagName == "title" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "link" -> {
|
|
val href = parser.getAttributeValue(null, "href")
|
|
val rel = parser.getAttributeValue(null, "rel")
|
|
if (href != null) {
|
|
if (currentItem != null) {
|
|
if (rel == "alternate" || rel == null) {
|
|
currentItem["link"] = href
|
|
} else if (rel == "enclosure") {
|
|
val type = parser.getAttributeValue(null, "type") ?: "application/octet-stream"
|
|
val length = parser.getAttributeValue(null, "length")?.toLongOrNull()
|
|
currentItem["enclosure"] = Enclosure(href, type, length)
|
|
}
|
|
} else {
|
|
if (rel == "alternate" || rel == null) {
|
|
link = href
|
|
}
|
|
}
|
|
}
|
|
currentTag = null
|
|
inContent = false
|
|
}
|
|
tagName == "subtitle" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "summary" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "content" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "updated" || tagName == "published" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "name" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "uri" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "id" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "category" -> {
|
|
val term = parser.getAttributeValue(null, "term")
|
|
if (term != null && currentItem != null) {
|
|
val cats = currentItem["categories"] as? MutableList<String> ?: mutableListOf()
|
|
cats.add(term)
|
|
currentItem["categories"] = cats
|
|
}
|
|
currentTag = null
|
|
inContent = false
|
|
}
|
|
tagName == "generator" -> {
|
|
currentTag = tagName
|
|
inContent = true
|
|
}
|
|
tagName == "summary" && namespace == ITUNES_NS -> {
|
|
if (currentItem != null) {
|
|
currentItem["itunesSummary"] = readElementText(parser)
|
|
}
|
|
}
|
|
tagName == "image" && namespace == ITUNES_NS -> {
|
|
val href = parser.getAttributeValue(null, "href")
|
|
if (href != null && currentItem != null) {
|
|
currentItem["image"] = href
|
|
}
|
|
}
|
|
tagName == "duration" && namespace == ITUNES_NS -> {
|
|
currentItem?.put("duration", readElementText(parser))
|
|
}
|
|
tagName == "thumbnail" && namespace == MEDIA_NS -> {
|
|
val url = parser.getAttributeValue(null, "url")
|
|
if (url != null && currentItem != null) {
|
|
currentItem["mediaThumbnail"] = url
|
|
}
|
|
}
|
|
tagName == "enclosure" && namespace == MEDIA_NS -> {
|
|
val url = parser.getAttributeValue(null, "url")
|
|
val type = parser.getAttributeValue(null, "type")
|
|
val length = parser.getAttributeValue(null, "length")?.toLongOrNull()
|
|
if (url != null && type != null && currentItem != null) {
|
|
currentItem["enclosure"] = Enclosure(url, type, length)
|
|
}
|
|
}
|
|
else -> {}
|
|
}
|
|
}
|
|
|
|
XmlPullParser.TEXT -> {
|
|
val text = parser.text?.xmlTrimmed() ?: ""
|
|
if (text.isNotEmpty() && inContent) {
|
|
if (currentItem != null) {
|
|
when (currentTag) {
|
|
"title" -> currentItem["title"] = text
|
|
"summary" -> currentItem["summary"] = text
|
|
"content" -> currentItem["content"] = text
|
|
"name" -> currentItem["author"] = text
|
|
"id" -> currentItem["guid"] = text
|
|
"updated", "published" -> currentItem[currentTag] = text
|
|
}
|
|
} else {
|
|
when (currentTag) {
|
|
"title" -> title = text
|
|
"subtitle" -> subtitle = text
|
|
"id" -> if (title == null) title = text
|
|
"updated" -> updated = XmlDateParser.parse(text)
|
|
"generator" -> generator = text
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
XmlPullParser.END_TAG -> {
|
|
val tagName = parser.name
|
|
if (tagName == "entry" && currentItem != null) {
|
|
items.add(buildFeedItem(currentItem))
|
|
currentItem = null
|
|
}
|
|
if (tagName == currentTag) {
|
|
currentTag = null
|
|
inContent = false
|
|
}
|
|
}
|
|
}
|
|
eventType = parser.next()
|
|
}
|
|
|
|
return Feed(
|
|
id = generateUuid(),
|
|
title = title ?: "Untitled Feed",
|
|
link = link,
|
|
subtitle = subtitle,
|
|
description = subtitle,
|
|
updated = updated,
|
|
generator = generator,
|
|
items = items,
|
|
rawUrl = feedUrl,
|
|
lastFetchedAt = java.util.Date()
|
|
)
|
|
}
|
|
|
|
private fun readElementText(parser: XmlPullParser): String {
|
|
var text = ""
|
|
var eventType = parser.eventType
|
|
while (eventType != XmlPullParser.END_TAG) {
|
|
if (eventType == XmlPullParser.TEXT) {
|
|
text = parser.text.xmlDecoded()
|
|
}
|
|
eventType = parser.next()
|
|
}
|
|
return text.xmlTrimmed()
|
|
}
|
|
|
|
@Suppress("UNCHECKED_CAST")
|
|
private fun buildFeedItem(item: Map<String, Any?>): FeedItem {
|
|
val title = item["title"] as? String ?: "Untitled"
|
|
val link = item["link"] as? String
|
|
val summary = item["summary"] as? String
|
|
val content = item["content"] as? String ?: summary
|
|
val itunesSummary = item["itunesSummary"] as? String
|
|
val author = item["author"] as? String
|
|
val guid = item["guid"] as? String ?: link ?: generateUuid()
|
|
val categories = item["categories"] as? List<String>
|
|
val enclosure = item["enclosure"] as? Enclosure
|
|
|
|
val updatedStr = item["updated"] as? String
|
|
val publishedStr = item["published"] as? String
|
|
val published = XmlDateParser.parse(publishedStr ?: updatedStr)
|
|
val updated = XmlDateParser.parse(updatedStr)
|
|
|
|
return FeedItem(
|
|
id = generateUuid(),
|
|
title = title,
|
|
link = link,
|
|
description = summary ?: itunesSummary,
|
|
content = content,
|
|
author = author,
|
|
published = published,
|
|
updated = updated,
|
|
categories = categories,
|
|
enclosure = enclosure,
|
|
guid = guid
|
|
)
|
|
}
|
|
}
|