Skip to content

Commit dfddfb5

Browse files
CCIndexWarcExport: use option group for the mutually exclusion options --query and --csv
1 parent 1ffabaf commit dfddfb5

2 files changed

Lines changed: 16 additions & 10 deletions

File tree

src/main/java/org/commoncrawl/spark/examples/CCIndexExport.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.commons.cli.CommandLineParser;
2626
import org.apache.commons.cli.HelpFormatter;
2727
import org.apache.commons.cli.Option;
28+
import org.apache.commons.cli.OptionGroup;
2829
import org.apache.commons.cli.Options;
2930
import org.apache.commons.cli.ParseException;
3031
import org.apache.commons.cli.PosixParser;
@@ -162,9 +163,11 @@ protected int parseOptions(String[] args, List<String> arguments) {
162163

163164
public void run(String[] args) throws IOException {
164165

165-
options.addOption(new Option("h", "help", false, "Show this message"))
166-
.addOption(new Option("q", "query", true, "SQL query to select rows"))
167-
.addOption(new Option("t", "table", true, "name of the table data is loaded into (default: ccindex)"));
166+
options.addOption(new Option("h", "help", false, "Show this message"));
167+
OptionGroup selectionSpec = new OptionGroup();
168+
selectionSpec.addOption(new Option("q", "query", true, "SQL query to select rows"));
169+
options.addOptionGroup(selectionSpec);
170+
options.addOption(new Option("t", "table", true, "name of the table data is loaded into (default: ccindex)"));
168171

169172
addOptions();
170173

@@ -174,7 +177,7 @@ public void run(String[] args) throws IOException {
174177
System.exit(res);
175178
}
176179
if (arguments.size() < 2) {
177-
System.err.println("Input and output path required!");
180+
System.err.println("Both, <tablePath> and <outputPath> are required!");
178181
help(options);
179182
System.exit(1);
180183
}

src/main/java/org/commoncrawl/spark/examples/CCIndexWarcExport.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.cli.CommandLine;
2525
import org.apache.commons.cli.CommandLineParser;
2626
import org.apache.commons.cli.Option;
27+
import org.apache.commons.cli.OptionGroup;
2728
import org.apache.commons.cli.ParseException;
2829
import org.apache.commons.cli.PosixParser;
2930
import org.apache.hadoop.conf.Configuration;
@@ -56,12 +57,14 @@ public class CCIndexWarcExport extends CCIndexExport {
5657

5758
@Override
5859
protected void addOptions() {
59-
options.getOption("query")
60-
.setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', "
61-
+ "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed.");
62-
options.addOption(new Option(null, "csv", true, "CSV file to load WARC records by filename, offset and length."
63-
+ "The CSV file must have column headers and the input columns `url', `warc_filename', "
64-
+ "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. "));
60+
Option query = options.getOption("query");
61+
query.setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', "
62+
+ "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed.");
63+
OptionGroup g = options.getOptionGroup(query).addOption(query).addOption(new Option(null, "csv", true,
64+
"CSV file to load WARC records by filename, offset and length. "
65+
+ "The CSV file must have column headers and the input columns `url', `warc_filename', "
66+
+ "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. "));
67+
options.addOptionGroup(g);
6568

6669
options.addOption(
6770
new Option(null, "numOutputPartitions", true, "repartition data to have <n> output partitions"));

0 commit comments

Comments
 (0)