WXR file managed by this migration */ public function __construct(array $arguments) { parent::__construct($arguments); $filename = $this->arguments['filename']; $this->wxrFile = $filename; $this->blog = wordpress_migrate_blog($filename); if (empty($this->arguments['namespaces'])) { $this->arguments['namespaces'] = array(); } } /** * Called after completion of each migration. */ protected function postImport() { parent::postImport(); // Clear ignored rows from the map, so as not to confuse reporting. $map_table = $this->map->getMapTable(); foreach ($this->skippedItems as $postID) { db_delete($map_table) ->condition('needs_update', MigrateMap::STATUS_IGNORED) ->condition('sourceid1', $postID) ->execute(); } $this->skippedItems = array(); } /** * Outputs a progress message, reflecting the current status of a migration process. * * @param int $result * Status of the process, represented by one of the Migration::RESULT_* constants. */ protected function progressMessage($result) { $time = microtime(TRUE) - $this->lastfeedback; if ($time > 0) { $perminute = round(60*$this->processed_since_feedback/$time); $time = round($time, 1); } else { $perminute = '?'; } if ($this->status == Migration::STATUS_IMPORTING) { switch ($result) { case Migration::RESULT_COMPLETED: $basetext = "Imported !numitems in !time sec (!perminute/min) - done with '!name'"; $type = 'completed'; break; case Migration::RESULT_FAILED: $basetext = "Imported !numitems in !time sec (!perminute/min) - failure with '!name'"; $type = 'failed'; break; case Migration::RESULT_INCOMPLETE: $basetext = "Imported !numitems in !time sec (!perminute/min) - continuing with '!name'"; $type = 'ok'; break; case Migration::RESULT_STOPPED: $basetext = "Imported !numitems in !time sec (!perminute/min) - stopped '!name'"; $type = 'warning'; break; } $numitems = $this->destination->getCreated(); } else { switch ($result) { case Migration::RESULT_COMPLETED: $basetext = "Rolled back !numitems in !time sec (!perminute/min) - done with '!name'"; $type = 'completed'; break; case Migration::RESULT_FAILED: $basetext = "Rolled back !numitems in !time sec (!perminute/min) - failure with '!name'"; $type = 'failed'; break; case Migration::RESULT_INCOMPLETE: $basetext = "Rolled back !numitems in !time sec (!perminute/min) - continuing with '!name'"; $type = 'ok'; break; case Migration::RESULT_STOPPED: $basetext = "Rolled back !numitems in !time sec (!perminute/min) - stopped '!name'"; $type = 'warning'; break; } $numitems = $this->processed_since_feedback + $this->source->getIgnored(); } $message = t($basetext, array('!numitems' => $numitems, '!time' => $time, '!perminute' => $perminute, '!name' => $this->machineName)); self::displayMessage($message, $type); if ($result == Migration::RESULT_INCOMPLETE) { $this->lastfeedback = time(); $this->processed_since_feedback = $this->successes_since_feedback = 0; $this->source->resetStats(); $this->destination->resetStats(); } } /** * Work-around for http://drupal.org/node/936222 - make sure our node_save() * calls not governed by the node destination class do not overwrite aliases. * * @param $node */ protected function disablePathauto($node) { $node->path['pathauto'] = 0; if (!isset($node->path['alias'])) { $node->path['alias'] = ''; } } } class WordPressBlog { protected $blogID; public function getBlogID() { return $this->blogID; } protected $filename; public function getFilename() { return $this->filename; } protected $wxrVersion = '1.0'; public function getWxrVersion() { return $this->wxrVersion; } protected $title; public function getTitle() { return $this->title; } protected $displayTitle; public function getDisplayTitle() { return $this->displayTitle; } protected $blog_url; public function getBlogUrl() { return $this->blog_url; } protected $link; public function getLink() { return $this->link; } protected $uid; public function getUid() { return $this->uid; } protected $arguments = array(); protected $migrations = array(); public function __construct($filename, $arguments = array()) { $this->filename = $filename; $this->arguments = $arguments; // Make sure the upload directory is properly protected file_create_htaccess('wordpress://', TRUE); // Suppress errors during parsing, so we can pick them up after libxml_use_internal_errors(TRUE); // Get the blog_url, which is our unique determiner of which blog we're // talking about $title = ''; $reader = new XMLReader; $status = $reader->open($this->filename); if ($status) { $this->blog_url = ''; while ($reader->read()) { if ($reader->nodeType == XMLREADER::ELEMENT) { switch ($reader->name) { case 'title': $title = WordPressBlog::readString($reader); $this->displayTitle = $title; break; case 'wp:wxr_version': $this->wxrVersion = WordPressBlog::readString($reader); break; case 'wp:base_blog_url': $this->blog_url = WordPressBlog::readString($reader); break; case 'link': $this->link = WordPressBlog::readString($reader); // Catch only the first link if (empty($this->link)) { $this->link = $reader->readString(); }; break; } } if (!empty($title) && !empty($this->blog_url) && !empty($this->link)) { break; } } } else { throw new Exception(t('Could not open XML file !url', array('!url' => $this->filename))); } // Validate that it really is a WXR file if (empty($this->blog_url)) { // Older WP versions did not have a blog_url but used link instead. if (!empty($this->link)) { $this->blog_url = $this->link; } else { throw new Exception(t('The uploaded file is not a valid WordPress export')); }; } // Keep only alphabetic characters $this->title = preg_replace('/[^A-Za-z]/', '', $title); if (!$this->title) { $this->title = preg_replace('/[^A-Za-z]/', '', $this->blog_url); } global $user; $this->uid = $user->uid; $status = db_merge('wordpress_migrate') ->key(array('blog_url' => $this->blog_url)) ->fields(array( 'title' => $this->title, 'uid' => $this->uid, 'link' => $this->link, 'filename' => $this->filename, 'wxr_version' => $this->wxrVersion, )) ->execute(); $this->blogID = db_select('wordpress_migrate', 'wm') ->fields('wm', array('blog_id')) ->condition('blog_url', $this->blog_url) ->execute() ->fetchField(); } public function machineName($class_name) { // If the default classes have been overridden, $class_name might be either // the default class name, or the name of the overridden class. Check first // for the former case, then the latter $classes = $this->migrationClasses(); if (!isset($classes[$class_name])) { $flipped = array_flip($classes); $class_name = $flipped[$class_name]; } return $this->title . substr($class_name, strlen('WordPress'), strlen($class_name) - strlen('WordPress')); } /** * The implemented WordPress migrations, in the order they should be run. */ public function migrationClasses() { return array( 'WordPressAuthor' => 'WordPressAuthor', 'WordPressCategory' => 'WordPressCategory', 'WordPressTag' => 'WordPressTag', 'WordPressBlogEntry' => 'WordPressBlogEntry', 'WordPressPage' => 'WordPressPage', 'WordPressAttachment' => 'WordPressAttachment', 'WordPressComment' => 'WordPressComment', ); } /** * Get a list of all migrations in this blog. * * @return Migration[] */ public function migrations() { if (empty($this->migrations)) { $this->migrations = array(); foreach ($this->migrationClasses() as $base_class => $actual_class) { try { $this->migrations[$actual_class] = MigrationBase::getInstance($this->machineName($actual_class)); } catch (Exception $e) { // Simply ignore non-existent migrations } } } return $this->migrations; } /** * Get a list of all WordPress blogs. * * @return WordPressBlog[] */ static public function blogs() { $blogs = array(); $result = db_select('wordpress_migrate', 'wm') ->fields('wm', array('filename')) ->execute(); foreach ($result as $row) { $blogs[] = wordpress_migrate_blog($row->filename); } return $blogs; } /** * WXR files typically need some cleanup to be successfully parsed - perform * that here. * * @param $sourcefile * The raw WXR file as uploaded. * @param $destination * Filespec to which to write the cleaned-up WXR file. Omit when * $namespaces_only == TRUE. * @param bool $unlink * Indicates whether $sourcefile will be deleted after preprocessing. * @param bool $namespaces_only * When TRUE, do not rewrite the file, simply gather and return the namespaces. * * @return array * List of referenced namespaces, keyed by prefix. */ static public function preprocessFile($sourcefile, $destination, $unlink = TRUE, $namespaces_only = FALSE) { // Cleanup some stuff in the process of moving the file to its final // destination $source_handle = fopen($sourcefile, 'r'); if (!$namespaces_only) { $dest_handle = fopen($destination, 'w'); } // First, get the header (everything before the element) to // rewrite the namespaces (skipping any empty lines). $header = ''; while (($line = fgets($source_handle)) !== FALSE) { if (trim($line)) { $header .= $line; if (strpos($line, '') !== FALSE) { break; } } } // The excerpt namespace is sometimes omitted, stuff it in if necessary $excerpt_ns = 'xmlns:excerpt="http://wordpress.org/export/1.0/excerpt/"'; $excerpt_signature = 'xmlns:excerpt="http://wordpress.org/export/'; $content_ns = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"'; if (!strpos($header, $excerpt_signature)) { $header = str_replace($content_ns, $excerpt_ns . "\n\t" . $content_ns, $header); } // Add the Atom namespace, in case it's referenced $atom_ns = 'xmlns:atom="http://www.w3.org/2005/Atom"'; $header = str_replace($content_ns, $atom_ns . "\n\t" . $content_ns, $header); // What the hell, throw in iTunes too $itunes_ns = 'xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"'; $header = str_replace($content_ns, $itunes_ns . "\n\t" . $content_ns, $header); preg_match_all('|xmlns:(.+?)="(.+?)"|i', $header, $matches, PREG_SET_ORDER); $namespaces = array(); foreach ($matches as $index => $match) { $namespaces[$match[1]] = $match[2]; } if ($namespaces_only) { return $namespaces; } // Replace HTML entities with XML entities $header = strtr($header, self::$entityReplacements); fputs($dest_handle, $header); // Now, do some line-by-line fix-ups fix unencoded ampersands and bogus characters on a line-by-line basis while ($line = fgets($source_handle)) { // Handle unencoded ampersands $line = preg_replace('/&(?![\w\d#]+;)/', '&', $line); // Remove control characters (the regex removes the newline, so tack it back on) $line = preg_replace('~\p{C}+~u', '', $line) . "\n"; // WordPress export doesn't properly format embedded CDATA sections - our // quick-and-dirty fix is to remove the terminator of the embedded section $line = preg_replace('|// \]\]|', '', $line); // Replace HTML entities with XML entities $line = strtr($line, self::$entityReplacements); fputs($dest_handle, $line); } fclose($dest_handle); fclose($source_handle); if ($unlink) { unlink($sourcefile); } return $namespaces; } /** * Translation table between HTML entities and XML entities; some WP blogs * use HTML entities in XML. * * @var array */ static protected $entityReplacements = array( 'Æ' => 'Æ', 'Á' => 'Á', 'Â' => 'Â', 'À' => 'À', 'Α' => 'Α', 'Å' => 'Å', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Β' => 'Β', 'Ç' => 'Ç', 'Χ' => 'Χ', '‡' => '‡', 'Δ' => 'Δ', 'Ð' => 'Ð', 'É' => 'É', 'Ê' => 'Ê', 'È' => 'È', 'Ε' => 'Ε', 'Η' => 'Η', 'Ë' => 'Ë', 'Γ' => 'Γ', 'Í' => 'Í', 'Î' => 'Î', 'Ì' => 'Ì', 'Ι' => 'Ι', 'Ï' => 'Ï', 'Κ' => 'Κ', 'Λ' => 'Λ', 'Μ' => 'Μ', 'Ñ' => 'Ñ', 'Ν' => 'Ν', 'Œ' => 'Œ', 'Ó' => 'Ó', 'Ô' => 'Ô', 'Ò' => 'Ò', 'Ω' => 'Ω', 'Ο' => 'Ο', 'Ø' => 'Ø', 'Õ' => 'Õ', 'Ö' => 'Ö', 'Φ' => 'Φ', 'Π' => 'Π', '″' => '″', 'Ψ' => 'Ψ', 'Ρ' => 'Ρ', 'Š' => 'Š', 'Σ' => 'Σ', 'Þ' => 'Þ', 'Τ' => 'Τ', 'Θ' => 'Θ', 'Ú' => 'Ú', 'Û' => 'Û', 'Ù' => 'Ù', 'Υ' => 'Υ', 'Ü' => 'Ü', 'Ξ' => 'Ξ', 'Ý' => 'Ý', 'Ÿ' => 'Ÿ', 'Ζ' => 'Ζ', 'á' => 'á', 'â' => 'â', '´' => '´', 'æ' => 'æ', 'à' => 'à', 'ℵ' => 'ℵ', 'α' => 'α', '∧' => '∧', '∠' => '∠', 'å' => 'å', '≈' => '≈', 'ã' => 'ã', 'ä' => 'ä', '„' => '„', 'β' => 'β', '¦' => '¦', '•' => '•', '∩' => '∩', 'ç' => 'ç', '¸' => '¸', '¢' => '¢', 'χ' => 'χ', 'ˆ' => 'ˆ', '♣' => '♣', '≅' => '≅', '©' => '©', '↵' => '↵', '∪' => '∪', '¤' => '¤', '⇓' => '⇓', '†' => '†', '↓' => '↓', '°' => '°', 'δ' => 'δ', '♦' => '♦', '÷' => '÷', 'é' => 'é', 'ê' => 'ê', 'è' => 'è', '∅' => '∅', ' ' => ' ', ' ' => ' ', 'ε' => 'ε', '≡' => '≡', 'η' => 'η', 'ð' => 'ð', 'ë' => 'ë', '€' => '€', '∃' => '∃', 'ƒ' => 'ƒ', '∀' => '∀', '½' => '½', '¼' => '¼', '¾' => '¾', '⁄' => '⁄', 'γ' => 'γ', '≥' => '≥', '⇔' => '⇔', '↔' => '↔', '♥' => '♥', '…' => '…', 'í' => 'í', 'î' => 'î', '¡' => '¡', 'ì' => 'ì', 'ℑ' => 'ℑ', '∞' => '∞', '∫' => '∫', 'ι' => 'ι', '¿' => '¿', '∈' => '∈', 'ï' => 'ï', 'κ' => 'κ', '⇐' => '⇐', 'λ' => 'λ', '⟨' => '〈', '«' => '«', '←' => '←', '⌈' => '⌈', '“' => '“', '≤' => '≤', '⌊' => '⌊', '∗' => '∗', '◊' => '◊', '‎' => '‎', '‹' => '‹', '‘' => '‘', '¯' => '¯', '—' => '—', 'µ' => 'µ', '·' => '·', '−' => '−', 'μ' => 'μ', '∇' => '∇', ' ' => ' ', '–' => '–', '≠' => '≠', '∋' => '∋', '¬' => '¬', '∉' => '∉', '⊄' => '⊄', 'ñ' => 'ñ', 'ν' => 'ν', 'ó' => 'ó', 'ô' => 'ô', 'œ' => 'œ', 'ò' => 'ò', '‾' => '‾', 'ω' => 'ω', 'ο' => 'ο', '⊕' => '⊕', '∨' => '∨', 'ª' => 'ª', 'º' => 'º', 'ø' => 'ø', 'õ' => 'õ', '⊗' => '⊗', 'ö' => 'ö', '¶' => '¶', '∂' => '∂', '‰' => '‰', '⊥' => '⊥', 'φ' => 'φ', 'π' => 'π', 'ϖ' => 'ϖ', '±' => '±', '£' => '£', '′' => '′', '∏' => '∏', '∝' => '∝', 'ψ' => 'ψ', '⇒' => '⇒', '√' => '√', '⟩' => '〉', '»' => '»', '→' => '→', '⌉' => '⌉', '”' => '”', 'ℜ' => 'ℜ', '®' => '®', '⌋' => '⌋', 'ρ' => 'ρ', '‏' => '‏', '›' => '›', '’' => '’', '‚' => '‚', 'š' => 'š', '⋅' => '⋅', '§' => '§', '­' => '­', 'σ' => 'σ', 'ς' => 'ς', '∼' => '∼', '♠' => '♠', '⊂' => '⊂', '⊆' => '⊆', '∑' => '∑', '¹' => '¹', '²' => '²', '³' => '³', '⊃' => '⊃', '⊇' => '⊇', 'ß' => 'ß', 'τ' => 'τ', '∴' => '∴', 'θ' => 'θ', 'ϑ' => 'ϑ', ' ' => ' ', 'þ' => 'þ', '˜' => '˜', '×' => '×', '™' => '™', '⇑' => '⇑', 'ú' => 'ú', '↑' => '↑', 'û' => 'û', 'ù' => 'ù', '¨' => '¨', 'ϒ' => 'ϒ', 'υ' => 'υ', 'ü' => 'ü', '℘' => '℘', 'ξ' => 'ξ', 'ý' => 'ý', '¥' => '¥', 'ÿ' => 'ÿ', 'ζ' => 'ζ', '‍' => '‍', '‌' => '‌', ); /** * With earlier versions of libxml, XMLReader has no readString() method - * mock it up if necessary. * * @param $reader * XMLReader instance being iterated for XML parsing. * * @return string */ static public function readString(XMLReader $reader) { if (method_exists('XMLReader', 'readString')) { return $reader->readString(); } else { $node = $reader->expand(); return $node->textContent; } } }