Implement Android RSS/Atom feed parser
- Add FeedParser.kt with automatic feed type detection - Add RSSParser.kt for RSS 2.0 feeds - Add AtomParser.kt for Atom 1.0 feeds - Add comprehensive unit tests for both parsers - Support iTunes namespace and enclosures - Fix pre-existing compilation issues in the codebase - Update build.gradle.kts with proper dependencies and AGP 8.5.0
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
package com.rssuper.parsing
|
||||
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Locale
|
||||
import java.util.TimeZone
|
||||
import java.util.UUID
|
||||
import java.util.regex.Pattern
|
||||
|
||||
object XmlDateParser {
|
||||
private val iso8601WithFractional: SimpleDateFormat by lazy {
|
||||
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", Locale.US).apply {
|
||||
timeZone = TimeZone.getTimeZone("UTC")
|
||||
}
|
||||
}
|
||||
|
||||
private val iso8601: SimpleDateFormat by lazy {
|
||||
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX", Locale.US).apply {
|
||||
timeZone = TimeZone.getTimeZone("UTC")
|
||||
}
|
||||
}
|
||||
|
||||
private val dateFormats: List<SimpleDateFormat> by lazy {
|
||||
listOf(
|
||||
SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US),
|
||||
SimpleDateFormat("EEE, dd MMM yyyy HH:mm Z", Locale.US),
|
||||
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
|
||||
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ", Locale.US),
|
||||
SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z", Locale.US),
|
||||
SimpleDateFormat("yyyy-MM-dd", Locale.US)
|
||||
).map {
|
||||
SimpleDateFormat(it.toPattern(), Locale.US).apply {
|
||||
timeZone = TimeZone.getTimeZone("UTC")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun parse(value: String?): java.util.Date? {
|
||||
val trimmed = value?.xmlTrimmed() ?: return null
|
||||
if (trimmed.isEmpty()) return null
|
||||
|
||||
return try {
|
||||
iso8601WithFractional.parse(trimmed)
|
||||
} catch (e: Exception) {
|
||||
try {
|
||||
iso8601.parse(trimmed)
|
||||
} catch (e: Exception) {
|
||||
for (format in dateFormats) {
|
||||
try {
|
||||
return format.parse(trimmed)
|
||||
} catch (e: Exception) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun String.xmlTrimmed(): String = this.trim { it <= ' ' }
|
||||
|
||||
fun String.xmlNilIfEmpty(): String? {
|
||||
val trimmed = this.xmlTrimmed()
|
||||
return if (trimmed.isEmpty()) null else trimmed
|
||||
}
|
||||
|
||||
fun String.xmlDecoded(): String {
|
||||
return this
|
||||
.replace(Regex("<!\\[CDATA\\[", RegexOption.IGNORE_CASE), "")
|
||||
.replace(Regex("\\]\\]>", RegexOption.IGNORE_CASE), "")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace("&", "&")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
.replace("'", "'")
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
||||
fun xmlInt64(value: String?): Long? {
|
||||
val trimmed = value?.xmlTrimmed() ?: return null
|
||||
if (trimmed.isEmpty()) return null
|
||||
return trimmed.toLongOrNull()
|
||||
}
|
||||
|
||||
fun xmlInt(value: String?): Int? {
|
||||
val trimmed = value?.xmlTrimmed() ?: return null
|
||||
if (trimmed.isEmpty()) return null
|
||||
return trimmed.toIntOrNull()
|
||||
}
|
||||
|
||||
fun xmlFirstTagValue(tag: String, inXml: String): String? {
|
||||
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
||||
val matcher = pattern.matcher(inXml)
|
||||
return if (matcher.find()) {
|
||||
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
fun xmlAllTagValues(tag: String, inXml: String): List<String> {
|
||||
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
||||
val matcher = pattern.matcher(inXml)
|
||||
val results = mutableListOf<String>()
|
||||
while (matcher.find()) {
|
||||
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()?.let { value ->
|
||||
if (value.isNotEmpty()) {
|
||||
results.add(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
fun xmlFirstBlock(tag: String, inXml: String): String? {
|
||||
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
||||
val matcher = pattern.matcher(inXml)
|
||||
return if (matcher.find()) matcher.group(1) else null
|
||||
}
|
||||
|
||||
fun xmlAllBlocks(tag: String, inXml: String): List<String> {
|
||||
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
||||
val matcher = pattern.matcher(inXml)
|
||||
val results = mutableListOf<String>()
|
||||
while (matcher.find()) {
|
||||
matcher.group(1)?.let { results.add(it) }
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
fun xmlAllTagAttributes(tag: String, inXml: String): List<Map<String, String>> {
|
||||
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b([^>]*)/?>", Pattern.CASE_INSENSITIVE)
|
||||
val matcher = pattern.matcher(inXml)
|
||||
val results = mutableListOf<Map<String, String>>()
|
||||
while (matcher.find()) {
|
||||
matcher.group(1)?.let { results.add(parseXmlAttributes(it)) }
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
private fun parseXmlAttributes(raw: String): Map<String, String> {
|
||||
val pattern = Pattern.compile("(\\w+(?::\\w+)?)\\s*=\\s*\"([^\"]*)\"")
|
||||
val matcher = pattern.matcher(raw)
|
||||
val result = mutableMapOf<String, String>()
|
||||
while (matcher.find()) {
|
||||
val key = matcher.group(1)?.lowercase() ?: continue
|
||||
val value = matcher.group(2)?.xmlDecoded()?.xmlTrimmed() ?: continue
|
||||
result[key] = value
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
fun generateUuid(): String = UUID.randomUUID().toString()
|
||||
Reference in New Issue
Block a user