groovy script to parse RSS with namespaces and download images



#!groovy
// program to parse RSS Feed and download images from digg
def rssurl = "http://feeds.digg.com/digg/news/popular.rss"
def slurp = new XmlSlurper()
def rssObj = slurp.parse(rssurl).declareNamespace(digg: "http://digg.com/docs/diggrss/", media: "http://search.yahoo.com/mrss/")
rssObj.channel.item.eachWithIndex { item , num ->
println "-------------------------------------------"
println item.title
println item."digg:category"
def url = item."media:thumbnail".@url.text()
if ( url ) {
println url
download(num, url)
}

}


def download(num , address)
{
def filename = address.tokenize("/")[-2]
def tmp = address.tokenize("/")[-1]
def ext = tmp.tokenize(".")[-1]
filename = filename << "." << ext
println "saving image file : " << filename
def file = new FileOutputStream(filename.toString())
def out = new BufferedOutputStream(file)
out << new URL(address).openStream()
out.close()
}
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s