|
24 | 24 | import org.apache.commons.cli.CommandLine; |
25 | 25 | import org.apache.commons.cli.CommandLineParser; |
26 | 26 | import org.apache.commons.cli.Option; |
| 27 | +import org.apache.commons.cli.OptionGroup; |
27 | 28 | import org.apache.commons.cli.ParseException; |
28 | 29 | import org.apache.commons.cli.PosixParser; |
29 | 30 | import org.apache.hadoop.conf.Configuration; |
@@ -56,12 +57,14 @@ public class CCIndexWarcExport extends CCIndexExport { |
56 | 57 |
|
57 | 58 | @Override |
58 | 59 | protected void addOptions() { |
59 | | - options.getOption("query") |
60 | | - .setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', " |
61 | | - + "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed."); |
62 | | - options.addOption(new Option(null, "csv", true, "CSV file to load WARC records by filename, offset and length." |
63 | | - + "The CSV file must have column headers and the input columns `url', `warc_filename', " |
64 | | - + "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. ")); |
| 60 | + Option query = options.getOption("query"); |
| 61 | + query.setDescription("SQL query to select rows. Note: the result is required to contain the columns `url', " |
| 62 | + + "`warc_filename', `warc_record_offset' and `warc_record_length', make sure they're SELECTed."); |
| 63 | + OptionGroup g = options.getOptionGroup(query).addOption(query).addOption(new Option(null, "csv", true, |
| 64 | + "CSV file to load WARC records by filename, offset and length. " |
| 65 | + + "The CSV file must have column headers and the input columns `url', `warc_filename', " |
| 66 | + + "`warc_record_offset' and `warc_record_length' are mandatory, see also option --query. ")); |
| 67 | + options.addOptionGroup(g); |
65 | 68 |
|
66 | 69 | options.addOption( |
67 | 70 | new Option(null, "numOutputPartitions", true, "repartition data to have <n> output partitions")); |
|
0 commit comments