Files
RSSuper/native-route/android/src/main/java/com/rssuper/parsing/XmlParsingUtilities.kt
Michael Freno d84b8ff4e8 Implement Android RSS/Atom feed parser
- Add FeedParser.kt with automatic feed type detection
- Add RSSParser.kt for RSS 2.0 feeds
- Add AtomParser.kt for Atom 1.0 feeds
- Add comprehensive unit tests for both parsers
- Support iTunes namespace and enclosures
- Fix pre-existing compilation issues in the codebase
- Update build.gradle.kts with proper dependencies and AGP 8.5.0
2026-03-30 09:01:49 -04:00

155 lines
5.1 KiB
Kotlin

package com.rssuper.parsing
import java.text.SimpleDateFormat
import java.util.Locale
import java.util.TimeZone
import java.util.UUID
import java.util.regex.Pattern
object XmlDateParser {
private val iso8601WithFractional: SimpleDateFormat by lazy {
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", Locale.US).apply {
timeZone = TimeZone.getTimeZone("UTC")
}
}
private val iso8601: SimpleDateFormat by lazy {
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX", Locale.US).apply {
timeZone = TimeZone.getTimeZone("UTC")
}
}
private val dateFormats: List<SimpleDateFormat> by lazy {
listOf(
SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US),
SimpleDateFormat("EEE, dd MMM yyyy HH:mm Z", Locale.US),
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ", Locale.US),
SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z", Locale.US),
SimpleDateFormat("yyyy-MM-dd", Locale.US)
).map {
SimpleDateFormat(it.toPattern(), Locale.US).apply {
timeZone = TimeZone.getTimeZone("UTC")
}
}
}
fun parse(value: String?): java.util.Date? {
val trimmed = value?.xmlTrimmed() ?: return null
if (trimmed.isEmpty()) return null
return try {
iso8601WithFractional.parse(trimmed)
} catch (e: Exception) {
try {
iso8601.parse(trimmed)
} catch (e: Exception) {
for (format in dateFormats) {
try {
return format.parse(trimmed)
} catch (e: Exception) {
continue
}
}
null
}
}
}
}
fun String.xmlTrimmed(): String = this.trim { it <= ' ' }
fun String.xmlNilIfEmpty(): String? {
val trimmed = this.xmlTrimmed()
return if (trimmed.isEmpty()) null else trimmed
}
fun String.xmlDecoded(): String {
return this
.replace(Regex("<!\\[CDATA\\[", RegexOption.IGNORE_CASE), "")
.replace(Regex("\\]\\]>", RegexOption.IGNORE_CASE), "")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&amp;", "&")
.replace("&quot;", "\"")
.replace("&apos;", "'")
.replace("&#39;", "'")
.replace("&#x27;", "'")
}
fun xmlInt64(value: String?): Long? {
val trimmed = value?.xmlTrimmed() ?: return null
if (trimmed.isEmpty()) return null
return trimmed.toLongOrNull()
}
fun xmlInt(value: String?): Int? {
val trimmed = value?.xmlTrimmed() ?: return null
if (trimmed.isEmpty()) return null
return trimmed.toIntOrNull()
}
fun xmlFirstTagValue(tag: String, inXml: String): String? {
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
val matcher = pattern.matcher(inXml)
return if (matcher.find()) {
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()
} else {
null
}
}
fun xmlAllTagValues(tag: String, inXml: String): List<String> {
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
val matcher = pattern.matcher(inXml)
val results = mutableListOf<String>()
while (matcher.find()) {
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()?.let { value ->
if (value.isNotEmpty()) {
results.add(value)
}
}
}
return results
}
fun xmlFirstBlock(tag: String, inXml: String): String? {
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
val matcher = pattern.matcher(inXml)
return if (matcher.find()) matcher.group(1) else null
}
fun xmlAllBlocks(tag: String, inXml: String): List<String> {
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
val matcher = pattern.matcher(inXml)
val results = mutableListOf<String>()
while (matcher.find()) {
matcher.group(1)?.let { results.add(it) }
}
return results
}
fun xmlAllTagAttributes(tag: String, inXml: String): List<Map<String, String>> {
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b([^>]*)/?>", Pattern.CASE_INSENSITIVE)
val matcher = pattern.matcher(inXml)
val results = mutableListOf<Map<String, String>>()
while (matcher.find()) {
matcher.group(1)?.let { results.add(parseXmlAttributes(it)) }
}
return results
}
private fun parseXmlAttributes(raw: String): Map<String, String> {
val pattern = Pattern.compile("(\\w+(?::\\w+)?)\\s*=\\s*\"([^\"]*)\"")
val matcher = pattern.matcher(raw)
val result = mutableMapOf<String, String>()
while (matcher.find()) {
val key = matcher.group(1)?.lowercase() ?: continue
val value = matcher.group(2)?.xmlDecoded()?.xmlTrimmed() ?: continue
result[key] = value
}
return result
}
fun generateUuid(): String = UUID.randomUUID().toString()