value we're looking for in this migration * (post/page/attachment). * * @var string */ protected $postType; /** * List of available source fields. * * @var array */ protected $fields = array(); /** * Simple initialization. */ public function __construct($filename, $post_type, $cache_key, $namespaces = array()) { $source_options = array( 'reader_class' => 'MigrateXMLReader', 'cache_counts' => TRUE, 'cache_key' => $cache_key, ); $this->fields = $this->fields(); parent::__construct($filename, '/rss/channel/item', 'wp:post_id', $this->fields, $source_options, $namespaces); $this->postType = $post_type; } /** * Provides a list of available source fields, keyed by the field name * as it appears in the source data, with descriptions as the values. * * @return array */ public function fields() { return array( 'title' => 'Item title', 'link' => 'WordPress URL of the item', 'pubDate' => 'Published date', 'dc:creator' => 'WordPress username of the item author', 'guid' => 'Alternate URL of the item (?)', 'description' => '?', 'content:encoded' => 'Body of the item', 'excerpt:encoded' => 'Teaser for the item', 'wp:post_id' => 'Unique ID of the item within the blog', 'wp:post_date' => 'Date posted (author\s timezone?)', 'wp:post_date_gmt' => 'Date posted (GMT)', 'wp:comment_status' => 'Whether comments may be posted to this item (open/closed)', 'wp:ping_status' => '?', 'wp:post_name' => 'Trailing component of link', 'wp:status' => 'Item status (publish/draft/inherit)', 'wp:post_parent' => 'Parent item ID (?)', 'wp:menu_order' => 'Equivalent to Drupal weight?', 'wp:post_type' => 'Item type (post/page/attachment)', 'wp:post_password' => '?', 'wp:is_sticky' => 'Equivalent to Drupal sticky flag', 'category' => 'Categories (as nicename) assigned to this item', 'tag' => 'Tags (as nicename) assigned to this item', 'content' => 'Extracted from Wordpress content:encoded', 'status' => 'Extracted from Wordpress status', ); } /** * Return a count of all available source records. */ public function computeCount() { $count = 0; foreach ($this->sourceUrls as $url) { $reader = new $this->readerClass($url, $this->elementQuery, $this->idQuery); foreach ($reader as $element) { // Only count relevant postType $field = 'wp:post_type'; $post_type = current($element->xpath($field)); if ($post_type == $this->postType) { $count++; } } } return $count; } } /** * Intermediate Migration class, implementing behavior common across different * types (post_type) of items. */ abstract class WordPressItemMigration extends WordPressMigration { /** * The value we're looking for in this migration * (post/page/attachment). * * @var string */ protected $postType; public function getPostType() { return $this->postType; } /** * Indicates to the complete() method that the nid of this item needs to be * saved in linksToFix for later processing. * * @var boolean */ protected $linksNeedFixing = FALSE; /** * List of nids which have links needing fixing. * * @var array */ static protected $linksToFix = array(); /** * Track the fields for the term references, so we can fix up if necessary * in prepare(). * * @var string */ protected $tagField = NULL; protected $categoryField = NULL; /** * Set it up */ public function __construct(array $arguments = array()) { parent::__construct($arguments); // WordPress post type $this->postType = $this->arguments['post_type']; // Drupal content type (bundle) $bundle = $this->arguments['bundle']; // Save tag/category fields (used in prepare()). if (isset($arguments['tag_field'])) { $this->tagField = $arguments['tag_field']; } if (isset($arguments['category_field'])) { $this->categoryField = $arguments['category_field']; } // post_id is the unique ID of items in WordPress $this->map = new MigrateSQLMap($this->machineName, array( 'wp:post_id' => array( 'type' => 'int', 'not null' => TRUE, 'unsigned' => TRUE, 'description' => 'WordPress post ID', ) ), MigrateDestinationNode::getKeySchema() ); // Construct the source objects. $this->source = new WordPressItemSource($this->wxrFile, $this->postType, $this->machineName, $this->arguments['namespaces']); $this->destination = new MigrateDestinationNode($bundle); // Default mappings, applying to most or all migrations $this->addFieldMapping('title', 'title') ->xpath('title'); $this->addFieldMapping('created', 'wp:post_date') ->xpath('wp:post_date') ->description('Empty dates handled in prepare()'); $this->addFieldMapping('changed', 'wp:post_date') ->xpath('wp:post_date') ->description('Empty dates handled in prepare()'); // If we have a separate author migration, use it here $uid_mapping = $this->addFieldMapping('uid', 'dc:creator') ->xpath('dc:creator') ->description('Use matching username if any, otherwise current user'); if ($this->blog->getWxrVersion() != '1.0') { if (!empty($arguments['author_migration'])) { $author_migration = $arguments['author_migration']; } else { $author_migration = $this->group->getName() . 'Author'; } $uid_mapping->sourceMigration($author_migration); } $this->addFieldMapping('body', 'content'); $this->addFieldMapping('body:summary', 'excerpt:encoded') ->xpath('excerpt:encoded'); $this->addFieldMapping('body:format') ->defaultValue($this->arguments['text_format']); if (module_exists('comment')) { $this->addFieldMapping('comment', 'wp:comment_status') ->xpath('wp:comment_status') ->description('WP "open" mapped to Drupal COMMENT_NODE_OPEN'); } $this->addFieldMapping('status', 'status') ->description('Set Drupal status to 1 iff wp:status=publish'); $this->addFieldMapping(NULL, 'wp:post_parent') ->xpath('wp:post_parent') ->description('Only applies to attachments'); $this->addFieldMapping('sticky', 'wp:is_sticky') ->xpath('wp:is_sticky'); if (module_exists('path')) { switch ($this->arguments['path_action']) { // Do not set path aliases case 0: $this->addFieldMapping('path'); if (!module_exists('redirect')) { $this->addFieldMapping(NULL, 'link'); } if (module_exists('pathauto')) { $this->addFieldMapping('pathauto') ->defaultValue(0); } break; // Set path aliases to their original WordPress values case 1: $this->addFieldMapping('path', 'link'); if (module_exists('pathauto')) { $this->addFieldMapping('pathauto') ->defaultValue(0); } break; // Have pathauto generate new aliases case 2: $this->addFieldMapping('path'); if (!module_exists('redirect')) { $this->addFieldMapping(NULL, 'link'); } if (module_exists('pathauto')) { $this->addFieldMapping('pathauto') ->defaultValue(1); } break; } } if (module_exists('redirect')) { if ($this->arguments['generate_redirects']) { $this->addFieldMapping('migrate_redirects', 'link'); } else { $this->addFieldMapping('migrate_redirects'); $this->addFieldMapping(NULL, 'link'); } } if (module_exists('taxonomy')) { // Map the source fields to the configured vocabularies. Note the nicename // (WordPress machine name) is used for matching on sourceMigration - we // pull the actual tag/category separately in case we need to handle it // in prepare(). if ($this->tagField) { $this->addFieldMapping($this->tagField, 'tag') ->sourceMigration($arguments['group_name'] . $arguments['tag_migration']) ->xpath('category[@domain="post_tag"]/@nicename'); $this->addFieldMapping(NULL, 'tag_value') ->xpath('category[@domain="post_tag"]'); $this->addFieldMapping($this->tagField . ':source_type') ->defaultValue('tid'); } else { $this->addFieldMapping(NULL, 'tag'); } if ($this->categoryField) { $this->addFieldMapping($this->categoryField, 'category') ->sourceMigration($arguments['group_name'] . $arguments['category_migration']) ->xpath('category[@domain="category"]/@nicename'); $this->addFieldMapping(NULL, 'category_value') ->xpath('category[@domain="category"]'); $this->addFieldMapping($this->categoryField . ':source_type') ->defaultValue('tid'); } else { $this->addFieldMapping(NULL, 'category'); } } // If podcast migration is requested, add the mapping. $podcast_field = $this->arguments['podcast_field']; if ($podcast_field) { $this->addFieldMapping($podcast_field, 'enclosure') ->callbacks(array($this, 'handleEnclosure')); } // If an attachment field is configured, document the mapping. $attachment_field = $this->arguments['attachment_field']; if ($attachment_field) { $this->addFieldMapping($attachment_field) ->description('Attachment field populated later by attachment migration'); } // Unmapped destination fields $this->addUnmigratedDestinations(array('is_new', 'revision', 'language', 'promote', 'revision_uid', 'log', 'tnid', 'translate', 'body:language')); // Unmapped source fields $this->addUnmigratedSources(array('wp:post_id', 'wp:menu_order', 'wp:post_type')); $this->addFieldMapping(NULL, 'guid') ->description('same as link, plus isPermaLink attribute?') ->issueGroup(t('DNM')); $this->addFieldMapping(NULL, 'description') ->description('Always empty?') ->issueGroup(t('DNM')); $this->addFieldMapping(NULL, 'pubDate') ->description('Use post_date') ->issueGroup(t('DNM')); $this->addFieldMapping(NULL, 'wp:post_date_gmt') ->description('Use post_date') ->issueGroup(t('DNM')); $this->addFieldMapping(NULL, 'wp:ping_status') ->description('What does this mean?') ->issueGroup(t('Open issues')) ->issuePriority(MigrateFieldMapping::ISSUE_PRIORITY_MEDIUM); $this->addFieldMapping(NULL, 'wp:post_name') ->description('Looks like last component of path') ->issueGroup(t('DNM')); $this->addFieldMapping(NULL, 'wp:post_password') ->description('???') ->issueGroup(t('DNM')); } /** * Data manipulations to be performed before the migrate module applies mappings. * * @param stdClass $row * @return string */ public function prepareRow($row) { $wp_row = $row->xml->children($this->arguments['namespaces']['wp']); $content_row = $row->xml->children($this->arguments['namespaces']['content']); // Skip any of the wrong post type if ($wp_row->post_type != $this->postType) { $this->skippedItems[] = $row->{'wp:post_id'}; return FALSE; } // Only publish those with wp:status == 'publish' if (isset($wp_row->status)) { switch ($wp_row->status) { case 'publish': $row->status = NODE_PUBLISHED; break; case 'trash': return FALSE; default: $row->status = NODE_NOT_PUBLISHED; } } else { $row->status = NODE_NOT_PUBLISHED; } // If incoming date is zero (indicates unpublished content), use the current time if ($wp_row->post_date == '0000-00-00 00:00:00') { $row->{'wp:post_date'} = time(); } // If the link has a query string, don't produce a path $row->link = (string) $row->xml->link; if (strpos($row->link, '?')) { unset($row->link); } else { // Otherwise, strip the domain portion of the URL $matches = array(); if (preg_match('|https?://[^/]+/(.*)|', $row->link, $matches)) { $row->link = $matches[1]; // Strip the last slash off of the URL (the Path module can't handle this) $row->link = rtrim($row->link, '/'); } else { unset($row->link); } } // Translate WordPress comment_status to Drupal values if (module_exists('comment')) { if ($wp_row->comment_status == 'open') { $row->{'wp:comment_status'} = COMMENT_NODE_OPEN; } else { $row->{'wp:comment_status'} = COMMENT_NODE_CLOSED; } } // Pull out teasers out based on tags $broken = explode('', $row->content); if (count($broken) == 2) { $row->excerpt = $broken[0]; } $row->content = $content_row->encoded; // Interpret the [caption] tags $row->content = preg_replace_callback('|(\[caption.*?\])(.*?)(\[/caption\])|i', array($this, 'replaceCaptions'), $row->content); // Handle [youtube] tags - convert them to tags. If the media module is // installed, the next step will then convert them to media tags $replacement = '' . '' . '' . '' . '' . ''; // One form is [youtube dQw4w9WgXcQ] $row->content = preg_replace('|\[youtube (.+?)\]|i', $replacement, $row->content); // Another form is [youtube=https://www.youtube.com/watch?v=dQw4w9WgXcQ] $row->content = preg_replace('|\[youtube=.+?=([a-z0-9_-]+)[^\]]*\]|i', $replacement, $row->content); // Rewrite embedded video references to media tags if (module_exists('media')) { $row->content = preg_replace_callback('|]*>.*?(]*>).*?|i', array($this, 'replaceEmbeds'), $row->content); } // Rewrite (or remember to rewrite) links of the form // http://example.wordpress.com/?p=19 to local links of the form /node/35 $row->content = $this->fixLocalLinks($row->content); // Handle Embedit HTML embed if (isset($row->HTML1)) { $row->content = $this->replaceHTMLEmbeds($row, $row->content); } // Replace Kimilli Flash Embed tags with $row->content = preg_replace_callback( '|\[kml_flashembed *(.*?)/\]|i', array($this, 'replaceFlashEmbeds'), $row->content); // Remove [gallery] shortcode tags. $row->content = preg_replace('|\[gallery (.+?)\]|i', '', $row->content); $row->{'content:encoded'} = $row->content; return TRUE; } /** * Rewrite [caption] tags into HTML representing a caption. * [caption] itself ($matches[1]) will become an opening
, * the content within the tag ($matches[2]) will be passed through unchanged, * and the closing [/caption] ($matches[3]) will become a

containing the * caption followed by a closing

. * * @param array $matches */ protected function replaceCaptions(array $matches) { $caption_open = $matches[1]; $content = $matches[2]; $caption_close = $matches[3]; preg_match('|width="(.*?)"|i', $caption_open, $matches); $width = (int)$matches[1] + 10; $style = "width: {$width}px;"; preg_match('|align="(.*?)"|i', $caption_open, $matches); $align = $matches[1]; switch ($align) { case 'aligncenter': $style .= "display:block;margin:0 auto;"; break; case 'alignleft': $style .= "float:left;"; break; case 'alignright': $style .= "float:right;"; break; default: break; } preg_match('|caption="(.*?)"|i', $caption_open, $matches); $caption = $matches[1]; $result = '
'; $result .= $content; $result .= "

$caption

"; return $result; } /** * If we have a YouTube or other media reference, replace it with media tags. * * @param array $matches */ protected function replaceEmbeds(array $matches) { // Default to the original tag. $result = $matches[0]; // If an tag is present, attempt to parse it. if ($matches[1]) { if (preg_match('|src=[\'"](.*?)[\'"]|i', $matches[1], $src_matches)) { $src = $src_matches[1]; } else { return $result; } // Attempt to parse embedded media automatically through the media module. try { $uri = media_parse_to_uri($src); if ($uri) { // Sometimes, at least with oembed, it doesn't like the /v/ form $uri = str_replace('youtube.com/v/', 'youtube.com/watch?v=', $uri); } } catch (Exception $e) { return $result; } if (empty($uri)) { return $result; } // Extract the width & height for the media tag. if (preg_match('|width=[\'"](.*?)[\'"]|i', $matches[1], $width_matches)) { $width = $width_matches[1]; } else { return $result; } if (preg_match('|height=[\'"](.*?)[\'"]|i', $matches[1], $height_matches)) { $height = $height_matches[1]; } else { return $result; } // Build a file object suitable for saving. if (function_exists('file_uri_to_object')) { $file = file_uri_to_object($uri, TRUE); if (!isset($file->fid)) { // Save the media. file_save($file); } } else { // We shouldn't get here. But just in case... return $result; } // Build the media tag $video_info = array( 'type' => 'media', 'view_mode' => 'media_large', 'fid' => $file->fid, 'attributes' => array( 'class' => 'media-image', 'typeof' => 'foaf:Image', 'height' => $height, 'width' => $width, 'style' => '', ), ); $result = '[[' . drupal_json_encode($video_info) . ']]'; } return $result; } /** * Replace a kml_flashembed tag with an HTML tag. * [kml_flashembed movie="http://example.com/video/soundslider.swf" FVARS="size=0" height="368" width="420" /] * @param array $matches */ protected function replaceFlashEmbeds(array $matches) { $attribute_matches = array(); $attribute_string = ''; preg_match_all('|(?P\w+)="(?P.*?)"|', $matches[0], $attribute_matches); foreach ($attribute_matches['name'] as $delta => $name) { $value = $attribute_matches['value'][$delta]; switch (drupal_strtolower($name)) { case 'movie': // Sometimes they've got their own player in their ahead of the movie, // strip it out $url_position = strpos($value, 'http://'); if ($url_position) { $movie = substr($value, $url_position); // Strip parameters $movie = substr($movie, 0, strpos($movie, '&')); } else { $movie = $value; } break; case 'fvars': $flashvars = $value; break; default: $attribute_string .= ' ' . $name . '="' . $value . '"'; break; } } $result = '\n"; $result .= '\n"; if (isset($flashvars)) { $result .= '\n"; } $result .= "\n"; $result .= '\n"; if (isset($flashvars)) { $result .= '\n"; } $result .= "\n"; $result .= "\n"; $result .= "\n"; return $result; } /** * Replace any hrefs to links of the form http://example.wordpress.com/?=23 * to local links to a node. * * @param string $body */ protected function fixLocalLinks($body) { $this->linksNeedFixing = FALSE; $site_url = $this->blog->getLink(); $pattern = '|href="' . $site_url . '/\?p=([0-9]+)"|i'; $body = preg_replace_callback($pattern, array($this, 'replaceLinks'), $body); return $body; } /** * If we have a local link of the form ?p=34, translate the WordPress ID into * a Drupal nid, and rewrite the link. * * @param array $matches */ protected function replaceLinks(array $matches) { // Default to the existing string $return = $matches[0]; $wordpress_id = (int)$matches[1]; // Check the blog entry and page maps to see if we can map this to a nid static $maps = array(); if (empty($maps)) { $machines = array($this->generateMachineName('WordPressBlogEntry'), $this->generateMachineName('WordPressPage')); foreach ($machines as $machine) { $maps[] = MigrationBase::getInstance($machine)->getMap(); } } foreach ($maps as $map) { $destination_id = $map->lookupDestinationID(array($wordpress_id), $this); if (!empty($destination_id)) { // Got a hit! Stop looking... $destination_id = reset($destination_id); break; } } // Remember if we didn't get a hit, complete() will set up for later review if (empty($destination_id)) { $this->linksNeedFixing = TRUE; } else { $return = 'href="/node/' . $destination_id . '"'; } return $return; } protected function replaceHTMLEmbeds($row, $text) { for ($i = 1; $i <= 9; $i++) { $field = "HTML$i"; if (isset($row->$field)) { $text = str_replace("[$field]", $row->$field, $text); } } return $text; } /** * Blubrry PowerPress podcast values are of the form * http://www.example.com/audio/example.mp3 * 7662957 * audio/mpeg * a:1:{s:8:"duration";s:8:"00:05:19";} * We will extract and return the first line, the URL of the audio file. * * @param $value * @return string */ protected function handleEnclosure($value) { $value_array = explode("\n", $value); return reset($value_array); } /** * Prepare node - called just before node_save(). * * @param stdClass $node * @param stdClass $row */ public function prepare(stdClass $node, stdClass $row) { // With WXR version 1.0, match creator username to Drupal username if // possible; otherwise, use the user that initiated the import. With later // versions, we've already got the right uid via the author migration. if ($this->blog->getWxrVersion() == '1.0') { static $drupal_static_fast; if (!isset($drupal_static_fast)) { $drupal_static_fast['user_map'] = &drupal_static(__FUNCTION__); $drupal_static_fast['default_user'] = &drupal_static(__FUNCTION__ . 'DefaultUser'); } $user_map = &$drupal_static_fast['user_map']; if (!isset($user_map[$row->creator])) { $user_map[$row->creator] = db_select('users', 'u') ->fields('u', array('uid')) ->condition('name', $row->creator) ->execute() ->fetchField(); if (!$user_map[$row->creator]) { $default_user = &$drupal_static_fast['default_user']; if (!isset($default_user)) { $default_user = db_select('wordpress_migrate', 'wpm') ->fields('wpm', array('uid')) ->condition('filename', $this->wxrFile) ->execute() ->fetchField(); } $user_map[$row->creator] = $default_user; } } $node->uid = $user_map[$row->creator]; } // If any term relationships were unresolved, create them the hard way foreach (array('tag', 'category') as $term_type) { $meta_field_name = $term_type . 'Field'; if ($this->$meta_field_name) { $field_name = $this->$meta_field_name; $value_name = $term_type . '_value'; // Shortcut - if the counts match, don't need to dig deeper $field_values = field_get_items('node', $node, $field_name); if (!empty($field_values)) { $node_count = count($field_values); } else { $node_count = 0; } if (isset($row->$term_type)) { $row_count = count($row->$term_type); } else { $row_count = 0; } if ($node_count != $row_count) { $field_info = field_info_field($field_name); $vocabulary_name = $field_info['settings']['allowed_values'][0]['vocabulary']; $vocabulary = taxonomy_vocabulary_machine_name_load($vocabulary_name); $vid = $vocabulary->vid; // Get any terms already in the field $done_terms = array(); if (is_array($field_values)) { foreach ($field_values as $value_array) { $terms = taxonomy_term_load_multiple($value_array); foreach ($terms as $term) { $done_terms[] = $term->name; } } } if (isset($row->$value_name)) { if (!is_array($row->$value_name)) { $row->$value_name = array($row->$value_name); } $values = array(); foreach ($row->$value_name as $value) { $values[] = html_entity_decode($value); } $diff = array_diff($values, $done_terms); $field_language = field_language('node', $node, $field_name); foreach ($diff as $new_term_name) { // Let's see if the term already exists $matches = taxonomy_term_load_multiple(array(), array('name' => trim($new_term_name), 'vid' => $vid)); if ($matches) { $node->{$field_name}[$field_language][] = array('tid' => key($matches)); } else { $term = new stdClass; $term->name = $new_term_name; $term->vid = $vid; taxonomy_term_save($term); $node->{$field_name}[$field_language][] = array('tid' => $term->tid); } } } } } } // If we have an attached podcast, replace [powerpress] in the body with // a link to the audio file. $podcast_field = $this->arguments['podcast_field']; if ($podcast_field && isset($node->$podcast_field)) { $podcast_fid = $node->{$podcast_field}[LANGUAGE_NONE][0]['fid']; if ($podcast_fid) { $podcast = file_load($podcast_fid); if ($podcast) { // file_create_url() gives us a full URL, which when running from // drush will often start with http://default/, which is not what we // want. Strip the prefix for a relative link. $url = file_create_url($podcast->uri); $url = str_replace($GLOBALS['base_url'] . '/', '/', $url); $link = '

'; $body_language = field_language('node', $node, 'body'); $node->body[$body_language][0]['value'] = str_replace('[powerpress]', $link, $node->body[$body_language][0]['value']); } } } } /** * Complete node - called just after node_save(). * * @param stdClass $node * @param stdClass $row */ public function complete(stdClass $node, stdClass $row) { // Remember the nid of any node where we weren't able to resolve ?p=23 // links yet - by the time the page migration's postImport() is called, we // should have resolved all references. if ($this->linksNeedFixing) { self::$linksToFix[] = $node->nid; } } } /** * Implementation of WordPressMigration, for blog entries */ class WordPressBlogEntry extends WordPressItemMigration { public function __construct(array $arguments = array()) { $arguments['bundle'] = $arguments['post_type']; $arguments['post_type'] = 'post'; parent::__construct($arguments); } } /** * Implementation of WordPressMigration, for pages */ class WordPressPage extends WordPressItemMigration { public function __construct(array $arguments = array()) { $arguments['bundle'] = $arguments['page_type']; $arguments['post_type'] = 'page'; parent::__construct($arguments); } /** * Called after completion of the page migration. We review any nodes with links * that couldn't be resolved at migration time (presumably because they refer to * nodes not yet migrated) and see if we can resolve them now. */ public function postImport() { parent::postImport(); foreach (self::$linksToFix as $nid) { $node = node_load($nid); // Maintain the original update datestamp $changed = $node->changed; $node->body[LANGUAGE_NONE][0]['value'] = $this->fixLocalLinks($node->body[LANGUAGE_NONE][0]['value']); $this->disablePathauto($node); node_save($node); db_update('node') ->fields(array('changed' => $changed)) ->condition('nid', $node->nid) ->execute(); } } }