From d54858bdc724728e287e324926f9664df34cdac9 Mon Sep 17 00:00:00 2001 From: Frank Maritato Date: Wed, 13 Apr 2011 17:05:43 -0700 Subject: [PATCH] added ability to write to hive partitions --- src/java/com/cloudera/sqoop/SqoopOptions.java | 20 +++- .../com/cloudera/sqoop/hive/TableDefWriter.java | 22 +++- .../com/cloudera/sqoop/tool/BaseSqoopTool.java | 20 +++- src/java/com/cloudera/sqoop/tool/VersionTool.java | 4 +- src/test/com/cloudera/sqoop/TestSqoopOptions.java | 10 ++ .../cloudera/sqoop/hive/TestTableDefWriter.java | 118 +++++++++++++------- 6 files changed, 144 insertions(+), 50 deletions(-) diff --git a/src/java/com/cloudera/sqoop/SqoopOptions.java b/src/java/com/cloudera/sqoop/SqoopOptions.java index 4327757..9a299d5 100644 --- a/src/java/com/cloudera/sqoop/SqoopOptions.java +++ b/src/java/com/cloudera/sqoop/SqoopOptions.java @@ -144,6 +144,8 @@ public class SqoopOptions implements Cloneable { @StoredAsProperty("hive.import") private boolean hiveImport; @StoredAsProperty("hive.overwrite.table") private boolean overwriteHiveTable; @StoredAsProperty("hive.table.name") private String hiveTableName; + @StoredAsProperty("hive.partition.key") private String hivePartitionKey; + @StoredAsProperty("hive.partition.value") private String hivePartitionValue; // An ordered list of column names denoting what order columns are // serialized to a PreparedStatement from a generated record type. @@ -1324,7 +1326,23 @@ public class SqoopOptions implements Cloneable { this.hiveTableName = name; } - /** + public String getHivePartitionKey() { + return hivePartitionKey; + } + + public void setHivePartitionKey(String hivePartitionKey) { + this.hivePartitionKey = hivePartitionKey; + } + + public String getHivePartitionValue() { + return hivePartitionValue; + } + + public void setHivePartitionValue(String hivePartitionValue) { + this.hivePartitionValue = hivePartitionValue; + } + + /** * @return the file size to split by when using --direct mode. */ public long getDirectSplitSize() { diff --git a/src/java/com/cloudera/sqoop/hive/TableDefWriter.java b/src/java/com/cloudera/sqoop/hive/TableDefWriter.java index 6db2afc..39e9a9a 100644 --- a/src/java/com/cloudera/sqoop/hive/TableDefWriter.java +++ b/src/java/com/cloudera/sqoop/hive/TableDefWriter.java @@ -65,9 +65,12 @@ public class TableDefWriter { * @param withComments if true, then tables will be created with a * timestamp comment. */ - public TableDefWriter(final SqoopOptions opts, final ConnManager connMgr, - final String inputTable, final String outputTable, - final Configuration config, final boolean withComments) { + public TableDefWriter(final SqoopOptions opts, + final ConnManager connMgr, + final String inputTable, + final String outputTable, + final Configuration config, + final boolean withComments) { this.options = opts; this.connManager = connMgr; this.inputTableName = inputTable; @@ -167,6 +170,12 @@ public class TableDefWriter { sb.append("COMMENT 'Imported by sqoop on " + curDateStr + "' "); } + if (options.getHivePartitionKey() != null) { + sb.append("PARTITIONED BY (") + .append(options.getHivePartitionKey()) + .append(" STRING) "); + } + sb.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '"); sb.append(getHiveOctalCharCode((int) options.getOutputFieldDelim())); sb.append("' LINES TERMINATED BY '"); @@ -208,6 +217,13 @@ public class TableDefWriter { sb.append(outputTableName); sb.append('`'); + if (options.getHivePartitionKey() != null) { + sb.append(" PARTITION (") + .append(options.getHivePartitionKey()) + .append("='").append(options.getHivePartitionValue()) + .append("')"); + } + LOG.debug("Load statement: " + sb.toString()); return sb.toString(); } diff --git a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java index 81d90a2..648e64a 100644 --- a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java +++ b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java @@ -89,6 +89,8 @@ public abstract class BaseSqoopTool extends SqoopTool { public static final String HIVE_IMPORT_ARG = "hive-import"; public static final String HIVE_TABLE_ARG = "hive-table"; public static final String HIVE_OVERWRITE_ARG = "hive-overwrite"; + public static final String HIVE_PARTITION_KEY_ARG = "hive-partition-key"; + public static final String HIVE_PARTITION_VALUE_ARG = "hive-partition-value"; public static final String NUM_MAPPERS_ARG = "num-mappers"; public static final String NUM_MAPPERS_SHORT_ARG = "m"; public static final String COMPRESS_ARG = "compress"; @@ -399,7 +401,16 @@ public abstract class BaseSqoopTool extends SqoopTool { .withDescription("Sets the table name to use when importing to hive") .withLongOpt(HIVE_TABLE_ARG) .create()); - + hiveOpts.addOption(OptionBuilder.withArgName("partition-key") + .hasArg() + .withDescription("Sets the partition key to use when importing to hive") + .withLongOpt(HIVE_PARTITION_KEY_ARG) + .create()); + hiveOpts.addOption(OptionBuilder.withArgName("partition-value") + .hasArg() + .withDescription("Sets the partition value to use when importing to hive") + .withLongOpt(HIVE_PARTITION_VALUE_ARG) + .create()); return hiveOpts; } @@ -653,6 +664,13 @@ public abstract class BaseSqoopTool extends SqoopTool { if (in.hasOption(HIVE_TABLE_ARG)) { out.setHiveTableName(in.getOptionValue(HIVE_TABLE_ARG)); } + + if (in.hasOption(HIVE_PARTITION_KEY_ARG)) { + out.setHivePartitionKey(in.getOptionValue(HIVE_PARTITION_KEY_ARG)); + } + if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) { + out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG)); + } } protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out) diff --git a/src/java/com/cloudera/sqoop/tool/VersionTool.java b/src/java/com/cloudera/sqoop/tool/VersionTool.java index e8a257c..14260f0 100644 --- a/src/java/com/cloudera/sqoop/tool/VersionTool.java +++ b/src/java/com/cloudera/sqoop/tool/VersionTool.java @@ -19,7 +19,7 @@ package com.cloudera.sqoop.tool; import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopVersion; +//import com.cloudera.sqoop.SqoopVersion; import com.cloudera.sqoop.cli.ToolOptions; /** @@ -34,7 +34,7 @@ public class VersionTool extends BaseSqoopTool { @Override /** {@inheritDoc} */ public int run(SqoopOptions options) { - System.out.print(new SqoopVersion().toString()); +// System.out.print(new SqoopVersion().toString()); return 0; } diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java index 0546094..e37815b 100644 --- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java +++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java @@ -236,6 +236,16 @@ public class TestSqoopOptions extends TestCase { assertEquals(4, opts.getNumMappers()); } + public void testHivePartitionParams() throws Exception { + String[] args = { + "--hive-partition-key", "ds", + "--hive-partition-value", "20110413" + }; + SqoopOptions opts = parse(args); + assertEquals("ds", opts.getHivePartitionKey()); + assertEquals("20110413", opts.getHivePartitionValue()); + } + public void testPropertySerialization1() { // Test that if we write a SqoopOptions out to a Properties, // and then read it back in, we get all the same results. diff --git a/src/test/com/cloudera/sqoop/hive/TestTableDefWriter.java b/src/test/com/cloudera/sqoop/hive/TestTableDefWriter.java index f6614b5..4bf9996 100644 --- a/src/test/com/cloudera/sqoop/hive/TestTableDefWriter.java +++ b/src/test/com/cloudera/sqoop/hive/TestTableDefWriter.java @@ -21,6 +21,7 @@ package com.cloudera.sqoop.hive; import java.util.HashMap; import java.util.Map; +import com.cloudera.sqoop.tool.ImportTool; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -33,49 +34,80 @@ import junit.framework.TestCase; */ public class TestTableDefWriter extends TestCase { - public static final Log LOG = LogFactory.getLog( - TestTableDefWriter.class.getName()); + public static final Log LOG = LogFactory.getLog( + TestTableDefWriter.class.getName()); - // Test getHiveOctalCharCode and expect an IllegalArgumentException. - private void expectExceptionInCharCode(int charCode) { - try { - TableDefWriter.getHiveOctalCharCode(charCode); - fail("Expected IllegalArgumentException"); - } catch (IllegalArgumentException iae) { - // Expected; ok. + // Test getHiveOctalCharCode and expect an IllegalArgumentException. + private void expectExceptionInCharCode(int charCode) { + try { + TableDefWriter.getHiveOctalCharCode(charCode); + fail("Expected IllegalArgumentException"); + } + catch (IllegalArgumentException iae) { + // Expected; ok. + } + } + + public void testHiveOctalCharCode() { + assertEquals("\\000", TableDefWriter.getHiveOctalCharCode(0)); + assertEquals("\\001", TableDefWriter.getHiveOctalCharCode(1)); + assertEquals("\\012", TableDefWriter.getHiveOctalCharCode((int) '\n')); + assertEquals("\\177", TableDefWriter.getHiveOctalCharCode(0177)); + + expectExceptionInCharCode(4096); + expectExceptionInCharCode(0200); + expectExceptionInCharCode(254); + } + + public void testDifferentTableNames() throws Exception { + Configuration conf = new Configuration(); + SqoopOptions options = new SqoopOptions(); + TableDefWriter writer = new TableDefWriter(options, null, + "inputTable", "outputTable", conf, false); + + Map colTypes = new HashMap(); + writer.setColumnTypes(colTypes); + + String createTable = writer.getCreateTableStmt(); + String loadData = writer.getLoadDataStmt(); + + LOG.debug("Create table stmt: " + createTable); + LOG.debug("Load data stmt: " + loadData); + + // Assert that the statements generated have the form we expect. + assertTrue(createTable.indexOf( + "CREATE TABLE IF NOT EXISTS `outputTable`") != -1); + assertTrue(loadData.indexOf("INTO TABLE `outputTable`") != -1); + assertTrue(loadData.indexOf("/inputTable'") != -1); + } + + public void testPartitions() throws Exception { + String[] args = { + "--hive-partition-key", "ds", + "--hive-partition-value", "20110413" + }; + Configuration conf = new Configuration(); + SqoopOptions options = new ImportTool().parseArguments(args, null, null, false); + TableDefWriter writer = new TableDefWriter(options, + null, + "inputTable", + "outputTable", + conf, + false); + + Map colTypes = new HashMap(); + writer.setColumnTypes(colTypes); + + String createTable = writer.getCreateTableStmt(); + String loadData = writer.getLoadDataStmt(); + + assertNotNull(createTable); + assertNotNull(loadData); + + assertEquals("CREATE TABLE IF NOT EXISTS `outputTable` ( ) " + + "PARTITIONED BY (ds STRING) " + + "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\054' " + + "LINES TERMINATED BY '\\012' STORED AS TEXTFILE", createTable); + assertTrue(loadData.endsWith(" PARTITION (ds='20110413')")); } - } - - public void testHiveOctalCharCode() { - assertEquals("\\000", TableDefWriter.getHiveOctalCharCode(0)); - assertEquals("\\001", TableDefWriter.getHiveOctalCharCode(1)); - assertEquals("\\012", TableDefWriter.getHiveOctalCharCode((int) '\n')); - assertEquals("\\177", TableDefWriter.getHiveOctalCharCode(0177)); - - expectExceptionInCharCode(4096); - expectExceptionInCharCode(0200); - expectExceptionInCharCode(254); - } - - public void testDifferentTableNames() throws Exception { - Configuration conf = new Configuration(); - SqoopOptions options = new SqoopOptions(); - TableDefWriter writer = new TableDefWriter(options, null, - "inputTable", "outputTable", conf, false); - - Map colTypes = new HashMap(); - writer.setColumnTypes(colTypes); - - String createTable = writer.getCreateTableStmt(); - String loadData = writer.getLoadDataStmt(); - - LOG.debug("Create table stmt: " + createTable); - LOG.debug("Load data stmt: " + loadData); - - // Assert that the statements generated have the form we expect. - assertTrue(createTable.indexOf( - "CREATE TABLE IF NOT EXISTS `outputTable`") != -1); - assertTrue(loadData.indexOf("INTO TABLE `outputTable`") != -1); - assertTrue(loadData.indexOf("/inputTable'") != -1); - } } -- 1.7.0.4