#!/usr/bin/ruby # ---------------------------------------------------- # This software is provided AS-IS, with NO WARRANTY. # Don't blame me if you have problems. # ---------------------------------------------------- # # Pebble-to-WordPress conversion script. # # Original Author: Vamsi Krishna # Author of This Version: John Casey # # Based on: # http://code.google.com/p/pebble2wordpress/ # # For more information on running the original version, see: # http://www.spritle.com/blogs/?p=107#more-107 # # For more information running THIS script, see: # http://www.johnofalltrades.name/?p=140 # ---------------------------------------------------- require 'rubygems' require 'rexml/document' require 'active_record' require 'yaml' require 'time' basedir = "inputXML" dbconfig = YAML::load( File.open( 'database.yml' ) ) ActiveRecord::Base.establish_connection( dbconfig ) class Term < ActiveRecord::Base set_table_name "wp_terms" end class TermRelationship < ActiveRecord::Base set_table_name "wp_term_relationships" end class Posts< ActiveRecord::Base set_table_name "wp_posts" end class Comments< ActiveRecord::Base set_table_name "wp_comments" end def time_of( element, xpath ) txt = element.get_text( xpath ) return DateTime.now unless txt # 17 Jul 2006 14:55:15:637 +0000 value = '' value = txt.value.split( /:\d\d\d/ )[0] return DateTime.strptime( value, "%d %b %Y %H:%M:%S" ) end def text_of( element, xpath ) txt = element.get_text( xpath ) return txt && txt.value ? txt.value : '' end def count_of( element, xpath ) elts = element.elements.to_a( xpath ) return elts ? elts.size : 0 end def path_part( title ) result = '' title.downcase.split( /[^a-zA-Z0-9]+/ ).each do |part| result << '-' if ( result.length > 0 ) result << part end return result end term_map = {} Term.find(:all).each do |term| term_map[term.slug] = term.term_id end count = 0 skipped = [] Dir.chdir( basedir ) do files = Dir.glob("*.xml").each do |file| if File.file?( file ) # begin document = REXML::Document.new File.new( file ) title = text_of( document.root, 'title' ) if ( title.length < 1 ) puts "Skipping post with empty title: #{file}" skipped << file next end puts "Adding: '#{title}'..." post = Posts.new date = time_of( document.root, 'date' ) body = text_of( document.root, 'body' ) body.gsub!( "img src=\"images/", "img src=\"/wp-content/uploads/" ) post.post_author = 1 post.post_date = date post.post_date_gmt = date post.post_content = body post.post_title = title post.post_excerpt = text_of( document.root, 'excerpt' ) post.post_status = "publish" post.comment_status = "open" post.ping_status = "" post.post_password = "" post.post_name = path_part( title ) post.to_ping = "" post.pinged = "" post.post_modified = date post.post_modified_gmt = date post.post_content_filtered = "" post.post_parent = 0 post.menu_order = 0 post.post_type = "post" post.post_mime_type = "" comment_count = count_of( document.root, '//comment' ) post.comment_count = comment_count post.save post.reload id = post.ID post.guid = "/?p=#{id}" post.save document.root.each_element( 'category' ) do |category_element| category = category_element.text.downcase if ( category ) category = category[1..-1] if category[0..0] == '/' category = path_part( category ) term_id = term_map[category] puts "...adding to category '#{category}' with term_id '#{term_id}'" rel = TermRelationship.new rel.object_id = id rel.term_taxonomy_id = term_id rel.term_order = 0 rel.save end end if ( comment_count > 0 ) puts "...adding #{comment_count} comments..." document.root.each_element( '//comment' ) do |element| comment = Comments.new comment.comment_post_ID = id comment.comment_author = text_of( element, 'author' ) comment.comment_author_email = text_of( element, 'email' ) comment.comment_author_url = text_of( element, 'website' ) comment.comment_author_IP = text_of( element, 'ipAddress' ) comment.comment_date = time_of( element, 'date' ) comment.comment_date_gmt = time_of( element, 'date' ) comment.comment_content = text_of( element, 'body' ) comment.comment_karma = 0 comment.comment_approved = text_of( element, 'state' ) == 'approved' ? 1 : 0 comment.comment_agent = "" comment.comment_type = "" comment.comment_parent = 0 comment.user_id = 0 comment.save end end puts "...done." count = count.next # rescue # puts "Failed to parse #{file}. Error: " << $! # end else puts "Skipping #{file}" end end end puts "Processed #{count} xml files successfully!" puts "\nSkipped the following due to processing errors:\n\n#{skipped.join( '\n- ' )}" if skipped.size > 0