CCIndexWarcExport: use option group for the mutually exclusion options --query and --csv

sebastian-nagel · sebastian-nagel · commit dfddfb5da25d · 2020-03-03T11:59:16.000+01:00
diff --git a/src/main/java/org/commoncrawl/spark/examples/CCIndexExport.java b/src/main/java/org/commoncrawl/spark/examples/CCIndexExport.java
@@ -25,6 +25,7 @@
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionGroup;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
@@ -162,9 +163,11 @@ protected int parseOptions(String[] args, List<String> arguments) {
 
 	public void run(String[] args) throws IOException {
 
-		options.addOption(new Option("h", "help", false, "Show this message"))
-			.addOption(new Option("q", "query", true, "SQL query to select rows"))
-			.addOption(new Option("t", "table", true, "name of the table data is loaded into (default: ccindex)"));
+		options.addOption(new Option("h", "help", false, "Show this message"));
+		OptionGroup selectionSpec = new OptionGroup();
+		selectionSpec.addOption(new Option("q", "query", true, "SQL query to select rows"));
+		options.addOptionGroup(selectionSpec);
+		options.addOption(new Option("t", "table", true, "name of the table data is loaded into (default: ccindex)"));
 
 		addOptions();
 
@@ -174,7 +177,7 @@ public void run(String[] args) throws IOException {
 			System.exit(res);
 		}
 		if (arguments.size() < 2) {
-			System.err.println("Input and output path required!");
+			System.err.println("Both, <tablePath> and <outputPath> are required!");
 			help(options);
 			System.exit(1);
 		}
diff --git a/src/main/java/org/commoncrawl/spark/examples/CCIndexWarcExport.java b/src/main/java/org/commoncrawl/spark/examples/CCIndexWarcExport.java
@@ -24,6 +24,7 @@
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
 import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionGroup;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
 import org.apache.hadoop.conf.Configuration;
@@ -56,12 +57,14 @@ public class CCIndexWarcExport extends CCIndexExport {
 
 	@Override
 	protected void addOptions() {
-		options.getOption("query")
-				.setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', "
-						+ "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed.");
-		options.addOption(new Option(null, "csv", true, "CSV file to load WARC records by filename, offset and length."
-				+ "The CSV file must have column headers and the input columns `url', `warc_filename', "
-				+ "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. "));
+		Option query = options.getOption("query");
+		query.setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', "
+				+ "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed.");
+		OptionGroup g = options.getOptionGroup(query).addOption(query).addOption(new Option(null, "csv", true,
+				"CSV file to load WARC records by filename, offset and length. "
+						+ "The CSV file must have column headers and the input columns `url', `warc_filename', "
+						+ "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. "));
+		options.addOptionGroup(g);
 
 		options.addOption(
 				new Option(null, "numOutputPartitions", true, "repartition data to have <n> output partitions"));