package com.rssuper.parsing import com.rssuper.models.Enclosure import com.rssuper.models.Feed import com.rssuper.models.FeedItem import org.xmlpull.v1.XmlPullParser import org.xmlpull.v1.XmlPullParserFactory import java.io.StringReader import java.util.Date object RSSParser { private val ITUNES_NS = "http://www.itunes.com/dtds/podcast-1.0.dtd" private val CONTENT_NS = "http://purl.org/rss/1.0/modules/content/" fun parse(xml: String, feedUrl: String): Feed { val factory = XmlPullParserFactory.newInstance() factory.isNamespaceAware = true val parser = factory.newPullParser() parser.setInput(StringReader(xml)) var title: String? = null var link: String? = null var description: String? = null var language: String? = null var lastBuildDate: Date? = null var generator: String? = null var ttl: Int? = null val items = mutableListOf() var currentItem: MutableMap? = null var currentTag: String? = null var eventType = parser.eventType while (eventType != XmlPullParser.END_DOCUMENT) { when (eventType) { XmlPullParser.START_TAG -> { val tagName = parser.name val namespace = parser.namespace when { tagName == "channel" -> {} tagName == "item" -> { currentItem = mutableMapOf() } tagName == "title" || tagName == "description" || tagName == "link" || tagName == "author" || tagName == "guid" || tagName == "pubDate" || tagName == "category" || tagName == "enclosure" -> { currentTag = tagName } tagName == "language" -> currentTag = tagName tagName == "lastBuildDate" -> currentTag = tagName tagName == "generator" -> currentTag = tagName tagName == "ttl" -> currentTag = tagName tagName == "subtitle" && namespace == ITUNES_NS -> { if (currentItem == null) { description = readElementText(parser) } } tagName == "summary" && namespace == ITUNES_NS -> { currentItem?.put("description", readElementText(parser)) } tagName == "duration" && namespace == ITUNES_NS -> { currentItem?.put("duration", readElementText(parser)) } tagName == "image" && namespace == ITUNES_NS -> { val href = parser.getAttributeValue(null, "href") if (href != null && currentItem != null) { currentItem.put("image", href) } } tagName == "encoded" && namespace == CONTENT_NS -> { currentItem?.put("content", readElementText(parser)) } else -> {} } if (tagName == "enclosure" && currentItem != null) { val url = parser.getAttributeValue(null, "url") val type = parser.getAttributeValue(null, "type") val length = parser.getAttributeValue(null, "length")?.toLongOrNull() if (url != null && type != null) { currentItem["enclosure"] = Enclosure(url, type, length) } } } XmlPullParser.TEXT -> { val text = parser.text?.xmlTrimmed() ?: "" if (text.isNotEmpty()) { if (currentItem != null) { when (currentTag) { "title" -> currentItem["title"] = text "description" -> currentItem["description"] = text "link" -> currentItem["link"] = text "author" -> currentItem["author"] = text "guid" -> currentItem["guid"] = text "pubDate" -> currentItem["pubDate"] = text "category" -> { val cats = currentItem["categories"] as? MutableList ?: mutableListOf() cats.add(text) currentItem["categories"] = cats } } } else { when (currentTag) { "title" -> title = text "link" -> link = text "description" -> description = text "language" -> language = text "lastBuildDate" -> lastBuildDate = XmlDateParser.parse(text) "generator" -> generator = text "ttl" -> ttl = text.toIntOrNull() } } } } XmlPullParser.END_TAG -> { val tagName = parser.name if (tagName == "item" && currentItem != null) { items.add(buildFeedItem(currentItem)) currentItem = null } currentTag = null } } eventType = parser.next() } return Feed( id = generateUuid(), title = title ?: "Untitled Feed", link = link, description = description, language = language, lastBuildDate = lastBuildDate, generator = generator, ttl = ttl, items = items, rawUrl = feedUrl, lastFetchedAt = Date() ) } private fun readElementText(parser: XmlPullParser): String { var text = "" var eventType = parser.eventType while (eventType != XmlPullParser.END_TAG) { if (eventType == XmlPullParser.TEXT) { text = parser.text.xmlDecoded() } eventType = parser.next() } return text.xmlTrimmed() } @Suppress("UNCHECKED_CAST") private fun buildFeedItem(item: Map): FeedItem { val title = item["title"] as? String ?: "Untitled" val link = item["link"] as? String val description = item["description"] as? String val content = item["content"] as? String ?: description val author = item["author"] as? String val guid = item["guid"] as? String ?: link ?: generateUuid() val categories = item["categories"] as? List val enclosure = item["enclosure"] as? Enclosure val pubDateStr = item["pubDate"] as? String val published = XmlDateParser.parse(pubDateStr) return FeedItem( id = generateUuid(), title = title, link = link, description = description, content = content, author = author, published = published, updated = published, categories = categories, enclosure = enclosure, guid = guid ) } }