- Add FeedParser.kt with automatic feed type detection - Add RSSParser.kt for RSS 2.0 feeds - Add AtomParser.kt for Atom 1.0 feeds - Add comprehensive unit tests for both parsers - Support iTunes namespace and enclosures - Fix pre-existing compilation issues in the codebase - Update build.gradle.kts with proper dependencies and AGP 8.5.0
155 lines
5.1 KiB
Kotlin
155 lines
5.1 KiB
Kotlin
package com.rssuper.parsing
|
|
|
|
import java.text.SimpleDateFormat
|
|
import java.util.Locale
|
|
import java.util.TimeZone
|
|
import java.util.UUID
|
|
import java.util.regex.Pattern
|
|
|
|
object XmlDateParser {
|
|
private val iso8601WithFractional: SimpleDateFormat by lazy {
|
|
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", Locale.US).apply {
|
|
timeZone = TimeZone.getTimeZone("UTC")
|
|
}
|
|
}
|
|
|
|
private val iso8601: SimpleDateFormat by lazy {
|
|
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX", Locale.US).apply {
|
|
timeZone = TimeZone.getTimeZone("UTC")
|
|
}
|
|
}
|
|
|
|
private val dateFormats: List<SimpleDateFormat> by lazy {
|
|
listOf(
|
|
SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US),
|
|
SimpleDateFormat("EEE, dd MMM yyyy HH:mm Z", Locale.US),
|
|
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US),
|
|
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ", Locale.US),
|
|
SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z", Locale.US),
|
|
SimpleDateFormat("yyyy-MM-dd", Locale.US)
|
|
).map {
|
|
SimpleDateFormat(it.toPattern(), Locale.US).apply {
|
|
timeZone = TimeZone.getTimeZone("UTC")
|
|
}
|
|
}
|
|
}
|
|
|
|
fun parse(value: String?): java.util.Date? {
|
|
val trimmed = value?.xmlTrimmed() ?: return null
|
|
if (trimmed.isEmpty()) return null
|
|
|
|
return try {
|
|
iso8601WithFractional.parse(trimmed)
|
|
} catch (e: Exception) {
|
|
try {
|
|
iso8601.parse(trimmed)
|
|
} catch (e: Exception) {
|
|
for (format in dateFormats) {
|
|
try {
|
|
return format.parse(trimmed)
|
|
} catch (e: Exception) {
|
|
continue
|
|
}
|
|
}
|
|
null
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fun String.xmlTrimmed(): String = this.trim { it <= ' ' }
|
|
|
|
fun String.xmlNilIfEmpty(): String? {
|
|
val trimmed = this.xmlTrimmed()
|
|
return if (trimmed.isEmpty()) null else trimmed
|
|
}
|
|
|
|
fun String.xmlDecoded(): String {
|
|
return this
|
|
.replace(Regex("<!\\[CDATA\\[", RegexOption.IGNORE_CASE), "")
|
|
.replace(Regex("\\]\\]>", RegexOption.IGNORE_CASE), "")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
.replace("&", "&")
|
|
.replace(""", "\"")
|
|
.replace("'", "'")
|
|
.replace("'", "'")
|
|
.replace("'", "'")
|
|
}
|
|
|
|
fun xmlInt64(value: String?): Long? {
|
|
val trimmed = value?.xmlTrimmed() ?: return null
|
|
if (trimmed.isEmpty()) return null
|
|
return trimmed.toLongOrNull()
|
|
}
|
|
|
|
fun xmlInt(value: String?): Int? {
|
|
val trimmed = value?.xmlTrimmed() ?: return null
|
|
if (trimmed.isEmpty()) return null
|
|
return trimmed.toIntOrNull()
|
|
}
|
|
|
|
fun xmlFirstTagValue(tag: String, inXml: String): String? {
|
|
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
|
val matcher = pattern.matcher(inXml)
|
|
return if (matcher.find()) {
|
|
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()
|
|
} else {
|
|
null
|
|
}
|
|
}
|
|
|
|
fun xmlAllTagValues(tag: String, inXml: String): List<String> {
|
|
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
|
val matcher = pattern.matcher(inXml)
|
|
val results = mutableListOf<String>()
|
|
while (matcher.find()) {
|
|
matcher.group(1)?.xmlDecoded()?.xmlTrimmed()?.let { value ->
|
|
if (value.isNotEmpty()) {
|
|
results.add(value)
|
|
}
|
|
}
|
|
}
|
|
return results
|
|
}
|
|
|
|
fun xmlFirstBlock(tag: String, inXml: String): String? {
|
|
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
|
val matcher = pattern.matcher(inXml)
|
|
return if (matcher.find()) matcher.group(1) else null
|
|
}
|
|
|
|
fun xmlAllBlocks(tag: String, inXml: String): List<String> {
|
|
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)</(?:\\w+:)?$tag}>", Pattern.CASE_INSENSITIVE)
|
|
val matcher = pattern.matcher(inXml)
|
|
val results = mutableListOf<String>()
|
|
while (matcher.find()) {
|
|
matcher.group(1)?.let { results.add(it) }
|
|
}
|
|
return results
|
|
}
|
|
|
|
fun xmlAllTagAttributes(tag: String, inXml: String): List<Map<String, String>> {
|
|
val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b([^>]*)/?>", Pattern.CASE_INSENSITIVE)
|
|
val matcher = pattern.matcher(inXml)
|
|
val results = mutableListOf<Map<String, String>>()
|
|
while (matcher.find()) {
|
|
matcher.group(1)?.let { results.add(parseXmlAttributes(it)) }
|
|
}
|
|
return results
|
|
}
|
|
|
|
private fun parseXmlAttributes(raw: String): Map<String, String> {
|
|
val pattern = Pattern.compile("(\\w+(?::\\w+)?)\\s*=\\s*\"([^\"]*)\"")
|
|
val matcher = pattern.matcher(raw)
|
|
val result = mutableMapOf<String, String>()
|
|
while (matcher.find()) {
|
|
val key = matcher.group(1)?.lowercase() ?: continue
|
|
val value = matcher.group(2)?.xmlDecoded()?.xmlTrimmed() ?: continue
|
|
result[key] = value
|
|
}
|
|
return result
|
|
}
|
|
|
|
fun generateUuid(): String = UUID.randomUUID().toString()
|