package com.rssuper.parsing import java.text.SimpleDateFormat import java.util.Locale import java.util.TimeZone import java.util.UUID import java.util.regex.Pattern object XmlDateParser { private val iso8601WithFractional: SimpleDateFormat by lazy { SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", Locale.US).apply { timeZone = TimeZone.getTimeZone("UTC") } } private val iso8601: SimpleDateFormat by lazy { SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX", Locale.US).apply { timeZone = TimeZone.getTimeZone("UTC") } } private val dateFormats: List by lazy { listOf( SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z", Locale.US), SimpleDateFormat("EEE, dd MMM yyyy HH:mm Z", Locale.US), SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US), SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ", Locale.US), SimpleDateFormat("yyyy-MM-dd HH:mm:ss Z", Locale.US), SimpleDateFormat("yyyy-MM-dd", Locale.US) ).map { SimpleDateFormat(it.toPattern(), Locale.US).apply { timeZone = TimeZone.getTimeZone("UTC") } } } fun parse(value: String?): java.util.Date? { val trimmed = value?.xmlTrimmed() ?: return null if (trimmed.isEmpty()) return null return try { iso8601WithFractional.parse(trimmed) } catch (e: Exception) { try { iso8601.parse(trimmed) } catch (e: Exception) { for (format in dateFormats) { try { return format.parse(trimmed) } catch (e: Exception) { continue } } null } } } } fun String.xmlTrimmed(): String = this.trim { it <= ' ' } fun String.xmlNilIfEmpty(): String? { val trimmed = this.xmlTrimmed() return if (trimmed.isEmpty()) null else trimmed } fun String.xmlDecoded(): String { return this .replace(Regex("", RegexOption.IGNORE_CASE), "") .replace("<", "<") .replace(">", ">") .replace("&", "&") .replace(""", "\"") .replace("'", "'") .replace("'", "'") .replace("'", "'") } fun xmlInt64(value: String?): Long? { val trimmed = value?.xmlTrimmed() ?: return null if (trimmed.isEmpty()) return null return trimmed.toLongOrNull() } fun xmlInt(value: String?): Int? { val trimmed = value?.xmlTrimmed() ?: return null if (trimmed.isEmpty()) return null return trimmed.toIntOrNull() } fun xmlFirstTagValue(tag: String, inXml: String): String? { val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)", Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(inXml) return if (matcher.find()) { matcher.group(1)?.xmlDecoded()?.xmlTrimmed() } else { null } } fun xmlAllTagValues(tag: String, inXml: String): List { val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)", Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(inXml) val results = mutableListOf() while (matcher.find()) { matcher.group(1)?.xmlDecoded()?.xmlTrimmed()?.let { value -> if (value.isNotEmpty()) { results.add(value) } } } return results } fun xmlFirstBlock(tag: String, inXml: String): String? { val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)", Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(inXml) return if (matcher.find()) matcher.group(1) else null } fun xmlAllBlocks(tag: String, inXml: String): List { val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b[^>]*>(.*?)", Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(inXml) val results = mutableListOf() while (matcher.find()) { matcher.group(1)?.let { results.add(it) } } return results } fun xmlAllTagAttributes(tag: String, inXml: String): List> { val pattern = Pattern.compile("(?is)<(?:\\w+:)?$tag\\b([^>]*)/?>", Pattern.CASE_INSENSITIVE) val matcher = pattern.matcher(inXml) val results = mutableListOf>() while (matcher.find()) { matcher.group(1)?.let { results.add(parseXmlAttributes(it)) } } return results } private fun parseXmlAttributes(raw: String): Map { val pattern = Pattern.compile("(\\w+(?::\\w+)?)\\s*=\\s*\"([^\"]*)\"") val matcher = pattern.matcher(raw) val result = mutableMapOf() while (matcher.find()) { val key = matcher.group(1)?.lowercase() ?: continue val value = matcher.group(2)?.xmlDecoded()?.xmlTrimmed() ?: continue result[key] = value } return result } fun generateUuid(): String = UUID.randomUUID().toString()