Ruby Buzz Forum - Merging the feeds

 #! /usr/bin/ruby
 


 # RubyRiver modules
 require 'get_param'
 require 'make_xml'
 


 # Ruby library for parsing XML
 require 'rexml/document'
 include REXML
 


 # Itemlist is a hash that will hold all the merged feed items.
 itemlist = {}
 


 cachedir = get_param("rubyriver.yml","cachedir")
 


 # Parse each of the feed files from the local cache.
 Dir.new(cachedir).each do |filename|
   if filename =~ /.xml/
 


     # Read the feed into memory as an XML document.
     feed = File.open(cachedir + "/" + filename)
     doc = Document.new(feed.read)
     feed.close
 


     # Extract the title and link for the entire feed.
     # This will be added to each item in the merged feed.
     feedtitle = doc.elements["rss/channel/title"].text
     feedlink = doc.elements["rss/channel/link"].text
 


     # Extract the details of each feed item and add it to the itemlist hash.
     doc.elements.each("rss/channel/item") do |item|
       pubdate = item.elements["pubDate"].text
 


       # Each item merged must have a pubdate to allow sorting.
       if not pubdate.empty?
 


         # Strip HTML tags from the post's text.
         # This is necessary, because only an excerpt will be published.
         description = item.elements["description"].text
         description = description.gsub(/<[^>]+>/,"")
 


         # Extract an excerpt of the text.
         max = get_param("rubyriver.yml","excerptlength")
         excerpt = description[0..max-1]
 


         # Don't split words.
         # Add characters until a space or the end of description is reached.
         while description[excerpt.length,1] != " " and excerpt.length+1 <= description.length
          excerpt += description[excerpt.length,1]
         end
 


         # Add a link to the original post if there is more in the description.
         if excerpt.length < description.length
           excerpt += ' <a href="' + item.elements["link"].text + '">[more]</a>'
         end
 


         # Wrap the excerpt in a CDATA tag to hide invalid XML.
         excerpt = "<![CDATA[" + excerpt + "]]>"
 


         # Build a hash of all feed items.
         # The date/time in pubdate will be used to sort the items.
         itemlist[Time.parse(pubdate)] = { "pubdate" => pubdate,
           "feedtitle" => feedtitle,
           "feedlink" => feedlink,
           "title" => item.elements["title"].text,
           "link" => item.elements["link"].text,
           "description" => excerpt }
 


       end
     end
   end
 end
 


 # Get the first N items in reverse chronological order.
 # .sort!, .reverse! and .slice! should work here, but they don't.
 # .sort returns an array.
 itemlist = itemlist.sort
 itemlist = itemlist.reverse
 itemlist = itemlist.slice(0,get_param("rubyriver.yml","maxposts"))
 


 # Create XML files with the resulting items.
 # The published feed will be standard RSS 2.0. This can only
 # include a title and link for the individual feed item.
 # The internal feed will include the item's overall feed title and feed link for
 # use on the RubyRiver page.
 make_xml(itemlist,get_param("rubyriver.yml","publishedfeed"))
 make_xml(itemlist,get_param("rubyriver.yml","internalfeed"))

	Web Artima.com
merge_feeds.rb