diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..bdbf1f6
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,29 @@
+name: cc-warc-examples build
+
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+    branches:
+    - master
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        java: [ 11, 17, 21 ]
+    name: Java ${{ matrix.java }}
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Setup JDK
+        uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: ${{ matrix.java }}
+          cache: 'maven'
+
+      - name: Build
+        run: mvn verify javadoc:aggregate
diff --git a/.gitignore b/.gitignore
index c84686a..04bcd36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,7 @@
 
 data/
 bin/
+/target/
+/.classpath
+/.project
+/.settings
diff --git a/README.md b/README.md
index b9591d8..74345fd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-![Common Crawl Logo](http://commoncrawl.org/wp-content/uploads/2012/04/ccLogo.png)
+![Common Crawl Logo](https://avatars.githubusercontent.com/u/1194841?s=64)
 
 # Common Crawl WARC Examples
 
@@ -10,11 +10,26 @@ There are three examples for Hadoop processing:
 + [WAT files] Server response analysis using response metadata
 + [WET files] Classic word count example using extracted text
 
-All three assume initially that the files are stored locally but can be trivially modified to pull them down from Common Crawl's Amazon S3 bucket.
-To acquire the files, you can use [S3Cmd](http://s3tools.org/s3cmd) or similar.
+For development, you likely want to start with input files stored locally in the `data/` subdirectory. To acquire the files, you can use any HTTP client or (if you are on AWS) the [AWS CLI](https://aws.amazon.com/cli/).
 
-    s3cmd get s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/warc/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz
+    mkdir data
+    cd data/
+    wget https://data.commoncrawl.org/crawl-data/CC-MAIN-2013-48/segments/1386163035819/warc/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz
+    wget https://data.commoncrawl.org/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz
 
+or on AWS
+
+    mkdir data
+    aws s3 cp s3://commoncrawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/warc/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz data/
+    aws s3 cp s3://commoncrawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz data/
+
+To build and run in [Hadoop local or non-distributed mode](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Standalone_Operation):
+
+    mvn package
+    <path-to-hadoop>/bin/hadoop jar target/cc-warc-examples-0.5-SNAPSHOT-jar-with-dependencies.jar org.commoncrawl.examples.mapreduce.WETWordCount -Dmapreduce.framework.name=local file:/tmp/cc/wet-word-count file:$PWD/data/*.wet.gz
+    
+Note: all three examples require that you specify the output directory and all input files or directories.
+      
 # License
 
 MIT License, as per `LICENSE`
diff --git a/eclipse-formatter.xml b/eclipse-formatter.xml
new file mode 100644
index 0000000..e9ac2f0
--- /dev/null
+++ b/eclipse-formatter.xml
@@ -0,0 +1,404 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="23">
+	<profile kind="CodeFormatterProfile" name="ccf" version="23">
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_for_statment" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_with_spaces" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_before_code_block" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_switch_case_expressions" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.count_line_length_from_starting_position" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_record_components" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_multiplicative_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameterized_type_references" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_logical_operator" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_annotation_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_record_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_enum_constant" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_multiplicative_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_abstract_method" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_enum_constant_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_variable_declarations_on_columns" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_catch_clause" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiplicative_operator" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_anonymous_type_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_switch_case_expressions" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_shift_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_code_block" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_bitwise_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_parameters" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_loops" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_simple_for_body_on_same_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_switch_case_arrow_operator" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_annotation" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_enum_constant" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.text_block_indentation" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_module_statements" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_names_descriptions" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_if_then_body_block_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_assignment_statements_on_columns" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_permitted_types" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression_chain" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_annotations" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_assertion_message_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_fields_grouping_blank_lines" value="2147483647"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_bitwise_operator" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="80"/>
+		<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_not_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_delcaration" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_arguments" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_case" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_permitted_types_in_type_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_record_header" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_bitwise_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.javadoc_do_not_separate_block_tags" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.indent_tag_description" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_record_constructor" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_string_concatenation" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_shift_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_shift_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_simple_do_while_body_on_same_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_record_components" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_for_loop_header" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_additive_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_simple_getter_setter_on_one_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case_after_arrow" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_string_concatenation" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_record_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_relational_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_logical_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_invocation" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_record_declaration" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_default" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_method_body" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_if_while_statement" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_case" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_switch_body_block_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_switch_case_with_arrow" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_descriptions_grouped" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="96"/>
+		<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_method_body_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_loop_body_block_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_type_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_additive_operator" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_record_constructor" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_relational_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_record_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_lambda_body" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="48"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_relational_operator" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_arrows_in_switch_on_columns" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_additive_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_string_concatenation" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.align_selector_in_method_invocation_on_expression_first_line" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_record_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_switch_case_with_arrow_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_switch_case_with_colon" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_after_code_block" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_default" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_between_different_tags" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_additive_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_conditional_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.join_line_comments" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_shift_operator" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_try_clause" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_code_block_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_record_components" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_bitwise_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_record_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_assignment_operator" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_switch_case_with_arrow" value="20"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_lambda_body_block_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="49"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_record_constructor_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_record_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_assertion_message" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_logical_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_record_declaration" value="16"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_relational_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="48"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_last_class_body_declaration" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_simple_while_body_on_same_line" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_logical_operator" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_statement_group_in_switch" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_lambda_declaration" value="common_lines"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_permitted_types" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.keep_enum_declaration_on_one_line" value="one_line_never"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_multiplicative_operator" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
+		<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_code_block" value="0"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="tab"/>
+		<setting id="org.eclipse.jdt.core.formatter.wrap_before_string_concatenation" value="true"/>
+		<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+		<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+	</profile>
+</profiles>
diff --git a/lib/webarchive-commons-jar-with-dependencies.jar b/lib/webarchive-commons-jar-with-dependencies.jar
deleted file mode 100644
index f1766aa..0000000
Binary files a/lib/webarchive-commons-jar-with-dependencies.jar and /dev/null differ
diff --git a/pom.xml b/pom.xml
index 97fdf14..a40d4dc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,207 +1,213 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
+<?xml version="1.0" encoding="UTF-8"?>
+<project
+		xmlns="http://maven.apache.org/POM/4.0.0"
+		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+		xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
 
-  <groupId>org.commoncrawl</groupId>
-  <artifactId>cc-warc-examples</artifactId>
-  <version>0.1-SNAPSHOT</version>
-  <packaging>jar</packaging>  
+	<groupId>org.commoncrawl</groupId>
+	<artifactId>cc-warc-examples</artifactId>
+	<version>0.6-SNAPSHOT</version>
+	<packaging>jar</packaging>
 
-  <name>cc-warc-examples</name>
-  <description>
-    Common Crawl WARC Examples.
-    Contains both wrappers for processing WARC files in Hadoop MapReduce jobs and Hadoop examples to get you started.
-  </description>
-  <url>https://github.com/Smerity/cc-warc-examples</url>
+	<name>cc-warc-examples</name>
+	<description>Common Crawl WARC Examples.
+    Contains both wrappers for processing WARC files in Hadoop MapReduce jobs and Hadoop examples to get you started.</description>
+	<url>https://github.com/commoncrawl/cc-warc-examples</url>
 
-  <licenses>
-    <license>
-      <name>The MIT License</name>
-      <url>http://www.opensource.org/licenses/mit-license.php</url>
-      <distribution>repo</distribution>
-    </license>
-  </licenses>
+	<licenses>
+		<license>
+			<name>The MIT License</name>
+			<url>http://www.opensource.org/licenses/mit-license.php</url>
+			<distribution>repo</distribution>
+		</license>
+	</licenses>
 
-  <scm>    
-    <connection>scm:git:git@github.com:Smerity/cc-warc-examples.git</connection>
-    <url>git@github.com:Smerity/cc-warc-examples.git</url>
-  </scm>
-  
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <build.time>${maven.build.timestamp}</build.time>
-    <maven.build.timestamp.format>yyyyMMddhhmmss</maven.build.timestamp.format>
-    <build.tag />
-    <!-- sonatype repositories are defaults for distributionManagement -->
-    <repository.id>sonatype-nexus-staging</repository.id>
-    <repository.url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</repository.url>
-    <snapshotRepository.id>sonatype-nexus-snapshots</snapshotRepository.id>
-    <snapshotRepository.url>https://oss.sonatype.org/content/repositories/snapshots/</snapshotRepository.url>
-  </properties>
+	<scm>
+		<connection>scm:git:git@github.com:commoncrawl/cc-warc-examples.git</connection>
+		<url>git@github.com:commoncrawl/cc-warc-examples.git</url>
+	</scm>
 
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<build.time>${maven.build.timestamp}</build.time>
+		<maven.build.timestamp.format>yyyyMMddhhmmss</maven.build.timestamp.format>
+		<build.tag/>
+		<!-- sonatype repositories are defaults for distributionManagement -->
+		<repository.id>sonatype-nexus-staging</repository.id>
+		<repository.url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</repository.url>
+		<snapshotRepository.id>sonatype-nexus-snapshots</snapshotRepository.id>
+		<snapshotRepository.url>https://oss.sonatype.org/content/repositories/snapshots/</snapshotRepository.url>
+	</properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-      <version>1.2.17</version>
-    </dependency>
+	<dependencies>
+		<dependency>
+			<groupId>log4j</groupId>
+			<artifactId>log4j</artifactId>
+			<version>1.2.17</version>
+		</dependency>
 
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>2.4</version>
-    </dependency>
+		<dependency>
+			<groupId>commons-io</groupId>
+			<artifactId>commons-io</artifactId>
+			<version>2.11.0</version>
+		</dependency>
 
-    <dependency>
-      <groupId>org.netpreserve.commons</groupId>
-      <artifactId>webarchive-commons</artifactId>
-      <version>1.1.2</version>
-    </dependency>
-    
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-core</artifactId>
-      <version>0.20.2-cdh3u4</version>
-    </dependency>
-  </dependencies>
+		<dependency>
+			<groupId>org.netpreserve.commons</groupId>
+			<artifactId>webarchive-commons</artifactId>
+			<version>3.0.2</version>
+			<exclusions>
+				<exclusion>
+					<groupId>org.apache.hadoop</groupId>
+					<artifactId>hadoop-core</artifactId>
+				</exclusion>
+			</exclusions>
+		</dependency>
 
-  <build>
-    <sourceDirectory>src</sourceDirectory>
-    
-    <plugins>
-      <plugin>
-	<groupId>org.apache.maven.plugins</groupId>
-	<artifactId>maven-compiler-plugin</artifactId>
-	<version>2.3.2</version>
-	<configuration>
-	  <source>1.5</source>
-	  <target>1.5</target>
-	</configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <version>2.4</version>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-          <finalName>cc-warc-examples-${project.version}</finalName>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-javadoc-plugin</artifactId>
-        <version>2.7</version>
-        <configuration>
-          <docfilessubdirs>true</docfilessubdirs>
-          <excludedocfilessubdir>.svn</excludedocfilessubdir>
-          <encoding>UTF-8</encoding>
-          <docEncoding>UTF-8</docEncoding>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>jar</goal>
-              <goal>javadoc</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>site</id>
-	    <phase>pre-site</phase>
-            <goals>
-              <goal>javadoc</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.1.1</version>
-        <executions>
-          <execution>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-	<groupId>org.apache.maven.plugins</groupId>
-	<artifactId>maven-release-plugin</artifactId>
-	<version>2.2.2</version>
-      </plugin>
-    </plugins>
-  </build>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-client</artifactId>
+			<version>3.3.6</version>
+			<scope>provided</scope>
+		</dependency>
+		<dependency>
+			<groupId>net.java.dev.jets3t</groupId>
+			<artifactId>jets3t</artifactId>
+			<version>0.9.4</version>
+		</dependency>
+	</dependencies>
 
-  <profiles>
-    <profile>
-      <id>release-sign-artifacts</id>
-      <activation>
-	<property>
-	  <name>performRelease</name>
-	  <value>true</value>
-	</property>
-      </activation>
-      <build>
-	<plugins>
-	  <plugin>
-	    <groupId>org.apache.maven.plugins</groupId>
-	    <artifactId>maven-gpg-plugin</artifactId>
-	    <version>1.4</version>
-	    <configuration>
-	      <passphrase>${gpg.passphrase}</passphrase>
-	    </configuration>
-	    <executions>
-	      <execution>
-		<id>sign-artifacts</id>
-		<phase>verify</phase>
-		<goals>
-		  <goal>sign</goal>
-		</goals>
-	      </execution>
-	    </executions>
-	  </plugin>
-	</plugins>
-      </build>
-    </profile>
-  </profiles>
+	<build>
 
-  <repositories>
-    <repository>
-      <id>cloudera</id>
-      <name>Cloudera Hadoop</name>
-      <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
-      <layout>default</layout>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>3.15.0</version>
+				<configuration>
+					<source>11</source>
+					<target>11</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<version>3.8.0</version>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+					<finalName>cc-warc-examples-${project.version}</finalName>
+				</configuration>
+				<executions>
+					<execution>
+						<goals>
+							<goal>single</goal>
+						</goals>
+						<phase>package</phase>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<artifactId>maven-javadoc-plugin</artifactId>
+				<version>3.12.0</version>
+				<configuration>
+					<docfilessubdirs>true</docfilessubdirs>
+					<excludedocfilessubdir>.svn</excludedocfilessubdir>
+					<encoding>UTF-8</encoding>
+				</configuration>
+				<executions>
+					<execution>
+						<goals>
+							<goal>jar</goal>
+							<goal>javadoc</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>site</id>
+						<goals>
+							<goal>javadoc</goal>
+						</goals>
+						<phase>pre-site</phase>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<artifactId>maven-source-plugin</artifactId>
+				<version>3.4.0</version>
+				<executions>
+					<execution>
+						<goals>
+							<goal>jar</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-release-plugin</artifactId>
+				<version>3.3.1</version>
+			</plugin>
+			<plugin>
+				<groupId>com.diffplug.spotless</groupId>
+				<artifactId>spotless-maven-plugin</artifactId>
+				<version>2.46.1</version>
+				<configuration>
+					<pom>
+						<!-- These are the defaults, you can override if you want -->
+						<includes>
+							<include>pom.xml</include>
+						</includes>
+						<sortPom>
+							<indentAttribute>all</indentAttribute>
+							<keepBlankLines>true</keepBlankLines>
+							<expandEmptyElements>false</expandEmptyElements>
+							<nrOfIndentSpace>-1</nrOfIndentSpace>
+							<predefinedSortOrder>recommended_2008_06</predefinedSortOrder>
+						</sortPom>
+					</pom>
+					<java>
+						<eclipse>
+							<file>${project.basedir}/eclipse-formatter.xml</file>
+						</eclipse>
+					</java>
+				</configuration>
+			</plugin>
+		</plugins>
+		<sourceDirectory>src</sourceDirectory>
+	</build>
+
+	<profiles>
+		<profile>
+			<id>release-sign-artifacts</id>
+			<activation>
+				<property>
+					<name>performRelease</name>
+					<value>true</value>
+				</property>
+			</activation>
+			<build>
+				<plugins>
+					<plugin>
+						<groupId>org.apache.maven.plugins</groupId>
+						<artifactId>maven-gpg-plugin</artifactId>
+						<version>3.2.8</version>
+						<configuration>
+							<passphrase>${gpg.passphrase}</passphrase>
+						</configuration>
+						<executions>
+							<execution>
+								<id>sign-artifacts</id>
+								<goals>
+									<goal>sign</goal>
+								</goals>
+								<phase>verify</phase>
+							</execution>
+						</executions>
+					</plugin>
+				</plugins>
+			</build>
+		</profile>
+	</profiles>
 
-      <releases>
-        <enabled>true</enabled>
-        <updatePolicy>daily</updatePolicy>
-        <checksumPolicy>warn</checksumPolicy>
-      </releases>
-      <snapshots>
-        <enabled>true</enabled>
-        <updatePolicy>daily</updatePolicy>
-        <checksumPolicy>warn</checksumPolicy>
-      </snapshots>
-    </repository>
-  </repositories>
-  
-  <distributionManagement>
-    <repository>
-      <id>${repository.id}</id>
-      <url>${repository.url}</url>
-    </repository>
-    <snapshotRepository>
-      <id>${snapshotRepository.id}</id>
-      <url>${snapshotRepository.url}</url>
-    </snapshotRepository>
-  </distributionManagement>
-  
 </project>
diff --git a/src/org/commoncrawl/examples/S3ReaderTest.java b/src/org/commoncrawl/examples/S3ReaderTest.java
index fefa4ce..1355573 100644
--- a/src/org/commoncrawl/examples/S3ReaderTest.java
+++ b/src/org/commoncrawl/examples/S3ReaderTest.java
@@ -1,54 +1,56 @@
 package org.commoncrawl.examples;
+
 import java.io.IOException;
 
 import org.archive.io.ArchiveReader;
 import org.archive.io.ArchiveRecord;
 import org.archive.io.warc.WARCReaderFactory;
 import org.jets3t.service.S3Service;
-import org.jets3t.service.S3ServiceException;
+import org.jets3t.service.ServiceException;
 import org.jets3t.service.impl.rest.httpclient.RestS3Service;
 import org.jets3t.service.model.S3Object;
 
 /**
- * This is a raw example of how you can retrieve a file from the
- * Common Crawl S3 bucket without credentials using JetS3t.
+ * This is a raw example of how you can retrieve a file from the Common Crawl S3 bucket without
+ * credentials using JetS3t.
  *
  * @author Stephen Merity (Smerity)
  */
 public class S3ReaderTest {
-	public static void main(String[] args) throws IOException, S3ServiceException {
+	public static void main(String[] args) throws IOException, ServiceException {
 		// We're accessing a publicly available bucket so don't need to fill in our credentials
 		S3Service s3s = new RestS3Service(null);
-		
+
 		// Let's grab a file out of the CommonCrawl S3 bucket
-		String fn = "common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/warc/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz";
-		S3Object f = s3s.getObject("aws-publicdatasets", fn, null, null, null, null, null, null);
-		
+		String fn = "crawl-data/CC-MAIN-2013-48/segments/1386163035819/warc/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz";
+		S3Object f = s3s.getObject("commoncrawl", fn, null, null, null, null, null, null);
+
 		// The file name identifies the ArchiveReader and indicates if it should be decompressed
 		ArchiveReader ar = WARCReaderFactory.get(fn, f.getDataInputStream(), true);
-		
+
 		// Once we have an ArchiveReader, we can work through each of the records it contains
 		int i = 0;
-		for(ArchiveRecord r : ar) {
+		for (ArchiveRecord r : ar) {
 			// The header file contains information such as the type of record, size, creation time, and URL
 			System.out.println("Header: " + r.getHeader());
 			System.out.println("URL: " + r.getHeader().getUrl());
 			System.out.println();
-			
+
 			// If we want to read the contents of the record, we can use the ArchiveRecord as an InputStream
 			// Create a byte array that is as long as all the record's stated length
 			byte[] rawData = new byte[r.available()];
 			r.read(rawData);
 			// Note: potential optimization would be to have a large buffer only allocated once
-			
+
 			// Why don't we convert it to a string and print the start of it? Let's hope it's text!
 			String content = new String(rawData);
 			System.out.println(content.substring(0, Math.min(500, content.length())));
 			System.out.println((content.length() > 500 ? "..." : ""));
-			
-			// Pretty printing to make the output more readable 
+
+			// Pretty printing to make the output more readable
 			System.out.println("=-=-=-=-=-=-=-=-=");
-			if (i++ > 4) break; 
+			if (i++ > 4)
+				break;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/org/commoncrawl/examples/WARCReaderTest.java b/src/org/commoncrawl/examples/WARCReaderTest.java
index ea18f6a..972e9ea 100644
--- a/src/org/commoncrawl/examples/WARCReaderTest.java
+++ b/src/org/commoncrawl/examples/WARCReaderTest.java
@@ -1,4 +1,5 @@
 package org.commoncrawl.examples;
+
 import java.io.FileInputStream;
 import java.io.IOException;
 
@@ -8,43 +9,44 @@
 import org.archive.io.warc.WARCReaderFactory;
 
 /**
- * A raw example of how to process a WARC file using the org.archive.io package.
- * Common Crawl S3 bucket without credentials using JetS3t.
+ * A raw example of how to process a WARC file using the org.archive.io package. Common Crawl S3
+ * bucket without credentials using JetS3t.
  *
  * @author Stephen Merity (Smerity)
  */
 public class WARCReaderTest {
 	/**
 	 * @param args
-	 * @throws IOException 
+	 * @throws IOException
 	 */
 	public static void main(String[] args) throws IOException {
-		// Set up a local compressed WARC file for reading 
+		// Set up a local compressed WARC file for reading
 		String fn = "data/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.gz";
 		FileInputStream is = new FileInputStream(fn);
 		// The file name identifies the ArchiveReader and indicates if it should be decompressed
 		ArchiveReader ar = WARCReaderFactory.get(fn, is, true);
-		
+
 		// Once we have an ArchiveReader, we can work through each of the records it contains
 		int i = 0;
-		for(ArchiveRecord r : ar) {
+		for (ArchiveRecord r : ar) {
 			// The header file contains information such as the type of record, size, creation time, and URL
 			System.out.println(r.getHeader());
 			System.out.println(r.getHeader().getUrl());
 			System.out.println();
-			
+
 			// If we want to read the contents of the record, we can use the ArchiveRecord as an InputStream
 			// Create a byte array that is as long as the record's stated length
 			byte[] rawData = IOUtils.toByteArray(r, r.available());
-			
+
 			// Why don't we convert it to a string and print the start of it? Let's hope it's text!
 			String content = new String(rawData);
 			System.out.println(content.substring(0, Math.min(500, content.length())));
 			System.out.println((content.length() > 500 ? "..." : ""));
-			
-			// Pretty printing to make the output more readable 
+
+			// Pretty printing to make the output more readable
 			System.out.println("=-=-=-=-=-=-=-=-=");
-			if (i++ > 4) break; 
+			if (i++ > 4)
+				break;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/org/commoncrawl/examples/mapreduce/ServerTypeMap.java b/src/org/commoncrawl/examples/mapreduce/ServerTypeMap.java
index 8009877..c290150 100644
--- a/src/org/commoncrawl/examples/mapreduce/ServerTypeMap.java
+++ b/src/org/commoncrawl/examples/mapreduce/ServerTypeMap.java
@@ -9,15 +9,17 @@
 import org.apache.log4j.Logger;
 import org.archive.io.ArchiveReader;
 import org.archive.io.ArchiveRecord;
+import org.json.JSONArray;
 import org.json.JSONException;
 import org.json.JSONObject;
 
 public class ServerTypeMap {
 	private static final Logger LOG = Logger.getLogger(ServerTypeMap.class);
+
 	protected static enum MAPPERCOUNTER {
-		RECORDS_IN,
-		NO_SERVER,
-		EXCEPTIONS
+		RECORDS_IN, //
+		NO_SERVER, //
+		EXCEPTIONS //
 	}
 
 	protected static class ServerMapper extends Mapper<Text, ArchiveReader, Text, LongWritable> {
@@ -38,15 +40,36 @@ public void map(Text key, ArchiveReader value, Context context) throws IOExcepti
 					String content = new String(rawData);
 					JSONObject json = new JSONObject(content);
 					try {
-						String server = json.getJSONObject("Envelope").getJSONObject("Payload-Metadata").getJSONObject("HTTP-Response-Metadata").getJSONObject("Headers").getString("Server");
-						outKey.set(server);
-						context.write(outKey, outVal);
+						String warcType = json.getJSONObject("Envelope")
+								.getJSONObject("WARC-Header-Metadata")
+								.getString("WARC-Type");
+						if (!warcType.equals("response")) {
+							continue;
+						}
+						JSONObject httpHeaders = json.getJSONObject("Envelope")
+								.getJSONObject("Payload-Metadata")
+								.getJSONObject("HTTP-Response-Metadata")
+								.getJSONObject("Headers");
+						JSONArray httpHeaderNames = httpHeaders.names();
+						for (int i = 0, l = httpHeaders.length(); i < l; i++) {
+							String headerName = httpHeaderNames.getString(i);
+							if (headerName.equalsIgnoreCase("server")) {
+								Object headerValue = httpHeaders.get(headerName);
+								if (headerValue instanceof JSONArray) {
+									for (int j = 0, L = ((JSONArray) headerValue).length(); j < L; j++) {
+										outKey.set(((JSONArray) headerValue).getString(j));
+										context.write(outKey, outVal);
+									}
+								} else {
+									outKey.set(headerValue.toString());
+									context.write(outKey, outVal);
+								}
+							}
+						}
 					} catch (JSONException ex) {
-						// If we reach here, the JSON object didn't have the header we were looking for
-						// There are likely better ways to check for json["Envelope"]["Payload-Metadata"][...] but this is concise
+						LOG.error("Failed to get HTTP header \"Server\" for " + r.getHeader().getUrl(), ex);
 					}
-				}
-				catch (Exception ex) {
+				} catch (Exception ex) {
 					LOG.error("Caught Exception", ex);
 					context.getCounter(MAPPERCOUNTER.EXCEPTIONS).increment(1);
 				}
diff --git a/src/org/commoncrawl/examples/mapreduce/TagCounterMap.java b/src/org/commoncrawl/examples/mapreduce/TagCounterMap.java
index c545259..cc5c663 100644
--- a/src/org/commoncrawl/examples/mapreduce/TagCounterMap.java
+++ b/src/org/commoncrawl/examples/mapreduce/TagCounterMap.java
@@ -1,6 +1,7 @@
 package org.commoncrawl.examples.mapreduce;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -14,9 +15,10 @@
 
 public class TagCounterMap {
 	private static final Logger LOG = Logger.getLogger(TagCounterMap.class);
+
 	protected static enum MAPPERCOUNTER {
-		RECORDS_IN,
-		EXCEPTIONS
+		RECORDS_IN, //
+		EXCEPTIONS //
 	}
 
 	protected static class TagCounterMapper extends Mapper<Text, ArchiveReader, Text, LongWritable> {
@@ -32,21 +34,21 @@ protected static class TagCounterMapper extends Mapper<Text, ArchiveReader, Text
 		public void map(Text key, ArchiveReader value, Context context) throws IOException {
 			// Compile the regular expression once as it will be used continuously
 			patternTag = Pattern.compile(HTML_TAG_PATTERN);
-			
+
 			for (ArchiveRecord r : value) {
 				try {
-					LOG.debug(r.getHeader().getUrl() + " -- " + r.available());
+					LOG.debug(r.getHeader().getUrl() + " -- " + r.available() + " -- " + r.getHeader().getMimetype());
 					// We're only interested in processing the responses, not requests or metadata
 					if (r.getHeader().getMimetype().equals("application/http; msgtype=response")) {
 						// Convenience function that reads the full message into a raw byte array
 						byte[] rawData = IOUtils.toByteArray(r, r.available());
-						String content = new String(rawData);
+						String content = new String(rawData, StandardCharsets.ISO_8859_1);
 						// The HTTP header gives us valuable information about what was received during the request
 						String headerText = content.substring(0, content.indexOf("\r\n\r\n"));
-						
+
 						// In our task, we're only interested in text/html, so we can be a little lax
 						// TODO: Proper HTTP header parsing + don't trust headers
-						if (headerText.contains("Content-Type: text/html")) {
+						if (headerText.toLowerCase().contains("content-type: text/html")) {
 							context.getCounter(MAPPERCOUNTER.RECORDS_IN).increment(1);
 							// Only extract the body of the HTTP response when necessary
 							// Due to the way strings work in Java, we don't use any more memory than before
@@ -60,8 +62,7 @@ public void map(Text key, ArchiveReader value, Context context) throws IOExcepti
 							}
 						}
 					}
-				}
-				catch (Exception ex) {
+				} catch (Exception ex) {
 					LOG.error("Caught Exception", ex);
 					context.getCounter(MAPPERCOUNTER.EXCEPTIONS).increment(1);
 				}
diff --git a/src/org/commoncrawl/examples/mapreduce/WARCTagCounter.java b/src/org/commoncrawl/examples/mapreduce/WARCTagCounter.java
index 26f40c1..ac30041 100644
--- a/src/org/commoncrawl/examples/mapreduce/WARCTagCounter.java
+++ b/src/org/commoncrawl/examples/mapreduce/WARCTagCounter.java
@@ -1,8 +1,11 @@
 package org.commoncrawl.examples.mapreduce;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -23,9 +26,9 @@
  */
 public class WARCTagCounter extends Configured implements Tool {
 	private static final Logger LOG = Logger.getLogger(WARCTagCounter.class);
-	
+
 	/**
-	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job. 
+	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job.
 	 */
 	public static void main(String[] args) throws Exception {
 		int res = ToolRunner.run(new Configuration(), new WARCTagCounter(), args);
@@ -34,38 +37,53 @@ public static void main(String[] args) throws Exception {
 
 	/**
 	 * Builds and runs the Hadoop job.
-	 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
+	 * 
+	 * @param args command line arguments
+	 * @return 0 if the Hadoop job completes successfully and 1 otherwise.
 	 */
 	@Override
-	public int run(String[] arg0) throws Exception {
+	public int run(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: " + this.getClass().getSimpleName() + " <outputpath> <inputpath>...");
+			return -1;
+		}
+		Path outputPath = null;
+		List<Path> inputPaths = new ArrayList<>();
+		for (int i = 0; i < args.length; i++) {
+			if (outputPath == null) {
+				outputPath = new Path(args[i]);
+			} else {
+				inputPaths.add(new Path(args[i]));
+			}
+		}
+		return run(outputPath, inputPaths.toArray(new Path[inputPaths.size()]));
+	}
+
+	public int run(Path outputPath, Path[] inputPaths)
+			throws IOException, ClassNotFoundException, InterruptedException {
 		Configuration conf = getConf();
-		//
-		Job job = new Job(conf);
+
+		Job job = Job.getInstance(conf);
 		job.setJarByClass(WARCTagCounter.class);
 		job.setNumReduceTasks(1);
-		
-		String inputPath = "data/*.warc.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
-		LOG.info("Input path: " + inputPath);
-		FileInputFormat.addInputPath(job, new Path(inputPath));
-		
-		String outputPath = "/tmp/cc/";
-		FileSystem fs = FileSystem.newInstance(conf);
-		if (fs.exists(new Path(outputPath))) {
-			fs.delete(new Path(outputPath), true);
+
+		for (int i = 0; i < inputPaths.length; i++) {
+			LOG.info("Input path: " + inputPaths[i]);
+			FileInputFormat.addInputPath(job, inputPaths[i]);
 		}
-		FileOutputFormat.setOutputPath(job, new Path(outputPath));
+
+		LOG.info("Output path: " + outputPath);
+		FileOutputFormat.setOutputPath(job, outputPath);
 
 		job.setInputFormatClass(WARCFileInputFormat.class);
 		job.setOutputFormatClass(TextOutputFormat.class);
 
 		job.setOutputKeyClass(Text.class);
-	    job.setOutputValueClass(LongWritable.class);
-	    
-	    job.setMapperClass(TagCounterMap.TagCounterMapper.class);
-	    job.setReducerClass(LongSumReducer.class);
+		job.setOutputValueClass(LongWritable.class);
+
+		job.setMapperClass(TagCounterMap.TagCounterMapper.class);
+		job.setReducerClass(LongSumReducer.class);
 
-	    return job.waitForCompletion(true) ? 0 : -1;
+		return job.waitForCompletion(true) ? 0 : -1;
 	}
 }
diff --git a/src/org/commoncrawl/examples/mapreduce/WATSampleOutLinks.java b/src/org/commoncrawl/examples/mapreduce/WATSampleOutLinks.java
new file mode 100644
index 0000000..c7eeb95
--- /dev/null
+++ b/src/org/commoncrawl/examples/mapreduce/WATSampleOutLinks.java
@@ -0,0 +1,490 @@
+package org.commoncrawl.examples.mapreduce;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
+import org.archive.io.ArchiveReader;
+import org.archive.io.ArchiveRecord;
+import org.commoncrawl.warc.WARCFileInputFormat;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+/**
+ * Extract and randomly sample outlinks (links to pages, not image and media links) from WAT
+ * files.
+ */
+public class WATSampleOutLinks extends Configured implements Tool {
+
+	private static final Logger LOG = Logger.getLogger(WATSampleOutLinks.class);
+
+	protected static enum COUNTER {
+		RECORDS, //
+		RESPONSE_RECORDS, //
+		RECORDS_NON_HTML, //
+		RECORDS_NOFOLLOW_X_ROBOTS_SKIPPED, //
+		RECORDS_NOFOLLOW_META_SKIPPED, //
+		EXCEPTIONS, //
+		EXCEPTIONS_JSON, //
+		EXCEPTIONS_URL_MALFORMED, //
+		LINKS_PAGE_ACCEPTED, //
+		LINKS_TOTAL, //
+		LINKS_MEDIA_SKIPPED, //
+		LINKS_REL_NOFOLLOW_SKIPPED, //
+		LINKS_UNSAFE_SKIPPED, //
+		LINKS_PAGE_UNIQ, //
+		LINKS_PAGE_UNIQ_ACCEPTED, //
+		LINKS_PAGE_UNIQ_SKIPPED_MAX_PER_PAGE, //
+		LINKS_RANDOM_SKIP, //
+		LINKS_RANDOM_SAMPLED, //
+		LINKS_MALFORMED_URL, //
+		LINKS_UNSAFE_TEXT_SKIPPED /** URL contains tab or newline character */
+	}
+
+	private static final Pattern dataUriPattern = Pattern.compile("@/data-(?:href|uri)$");
+	private static final Pattern globalLinkPattern = Pattern.compile("^(?:[a-z][a-z0-9]{1,5}:)?//");
+	private static Pattern nofollowPattern = Pattern.compile("\\bnofollow\\b", Pattern.CASE_INSENSITIVE);
+
+	protected static class OutLinkMapper extends Mapper<Text, ArchiveReader, Text, LongWritable> {
+		private Text outKey = new Text();
+		private LongWritable outVal = new LongWritable(1);
+		private LongWritable one = new LongWritable(1);
+		int maxOutlinksPerPage = 80;
+		boolean outlinksWeightedCount = false;
+		boolean respectNofollow = false;
+		boolean extractFeed = false;
+		String extractFeedMarker = "";
+		Pattern nofollowBotPattern = null;
+
+		@Override
+		public void setup(Context context) {
+			Configuration conf = context.getConfiguration();
+			maxOutlinksPerPage = conf.getInt("wat.outlinks.max.per.page", 80);
+			/**
+			 * weighted link counts: each page can distributed `wat.outlinks.max.per.page` points, links
+			 * from pages with many links get a lower weight, the weight is calculated as
+			 * `wat.outlinks.max.per.page / num_links_of_page`
+			 */
+			outlinksWeightedCount = conf.getBoolean("wat.outlinks.weighted.count", false);
+			extractFeed = conf.getBoolean("wat.outlinks.extract.feed", false);
+			extractFeedMarker = conf.get("wat.outlinks.extract.feed.marker", "");
+			respectNofollow = conf.getBoolean("wat.outlinks.respect.nofollow", false);
+			if (respectNofollow) {
+				String nofollowBotPatternString = conf.get("wat.outlinks.respect.nofollow.bot.pattern", "");
+				if (!nofollowBotPatternString.isBlank()) {
+					try {
+						nofollowBotPattern = Pattern
+								.compile("\\s*" + nofollowBotPatternString + "\\s*", Pattern.CASE_INSENSITIVE);
+					} catch (IllegalArgumentException e) {
+						LOG.error("Failed to compile wat.outlinks.respect.nofollow.bot.pattern", e);
+					}
+				}
+			}
+		}
+
+		@Override
+		public void map(Text key, ArchiveReader value, Context context) throws IOException {
+			record: for (ArchiveRecord r : value) {
+				// Skip any records that are not JSON
+				if (!r.getHeader().getMimetype().equals("application/json")) {
+					continue record;
+				}
+				try {
+					context.getCounter(COUNTER.RECORDS).increment(1);
+					// Convenience function that reads the full message into a raw byte array
+					byte[] rawData = IOUtils.toByteArray(r, r.available());
+					String content = new String(rawData);
+					try {
+						JSONObject json = new JSONObject(content);
+						JSONObject warcHeader = json.getJSONObject("Envelope").getJSONObject("WARC-Header-Metadata");
+						String warcType = warcHeader.getString("WARC-Type");
+						if (!warcType.equals("response")) {
+							continue record;
+						}
+						context.getCounter(COUNTER.RESPONSE_RECORDS).increment(1);
+						String base = warcHeader.getString("WARC-Target-URI");
+						if (base.charAt(0) == '<') {
+							// some WARC file enclose the WARC-Target-URI in <...>
+							base = base.substring(1, (base.length() - 2));
+						}
+						URL baseUrl = new URL(base);
+						JSONObject responseMetaData = json.getJSONObject("Envelope")
+								.getJSONObject("Payload-Metadata")
+								.getJSONObject("HTTP-Response-Metadata");
+						if (respectNofollow) {
+							// check HTTP header "X-Robots-Tag", eg.
+							// X-Robots-Tag: noindex, nofollow
+							// Note: only the first Header value is preserved in WAT files
+							JSONObject httpHeaders = responseMetaData.getJSONObject("Headers");
+							JSONArray httpHeaderNames = httpHeaders.names();
+							for (int i = 0, l = httpHeaders.length(); i < l; i++) {
+								String headerName = httpHeaderNames.getString(i);
+								if (headerName.equalsIgnoreCase("x-robots-tag")) {
+									Object headerValue = httpHeaders.get(headerName);
+									if (headerValue instanceof String) {
+										if (nofollowPattern.matcher((String) headerValue).find()) {
+											context.getCounter(COUNTER.RECORDS_NOFOLLOW_X_ROBOTS_SKIPPED).increment(1);
+											continue record;
+										}
+									} else if (headerValue instanceof JSONArray) {
+										for (int j = 0, L = ((JSONArray) headerValue).length(); j < L; j++) {
+											if (nofollowPattern.matcher(((JSONArray) headerValue).getString(j))
+													.find()) {
+												context.getCounter(COUNTER.RECORDS_NOFOLLOW_X_ROBOTS_SKIPPED)
+														.increment(1);
+												continue record;
+											}
+										}
+									} else {
+										LOG.error(
+												"Unexpected JSON value type when processing X-Robots-Tag: "
+														+ headerValue.getClass().getName());
+									}
+									/*
+									 * Note: continue to iterate over all HTTP headers because there might be variants
+									 * (lower/upper case) of the "X-Robots-Tag" header
+									 */
+								}
+							}
+						}
+						if (!responseMetaData.has("HTML-Metadata")) {
+							context.getCounter(COUNTER.RECORDS_NON_HTML).increment(1);
+							continue record;
+						}
+						JSONObject htmlMetaData = responseMetaData.getJSONObject("HTML-Metadata");
+						Set<String> outLinks = new HashSet<>();
+						if (htmlMetaData.has("Head")) {
+							JSONObject head = htmlMetaData.getJSONObject("Head");
+							if (head.has("Base")) {
+								base = head.getString("Base");
+								try {
+									URL b = new URL(baseUrl, base);
+									baseUrl = b;
+								} catch (MalformedURLException ex) {
+									LOG.error("Ignoring malformed base URL '" + base + "': " + ex.getMessage());
+								}
+							}
+							if (head.has("Metas")) {
+								JSONArray metas = head.getJSONArray("Metas");
+								for (int i = 0, l = metas.length(); i < l; i++) {
+									JSONObject meta = metas.getJSONObject(i);
+									if (meta.has("property") && meta.getString("property").equals("og:url")
+											&& meta.has("content")) {
+										try {
+											URL url = new URL(baseUrl, meta.getString("content"));
+											context.getCounter(COUNTER.LINKS_TOTAL).increment(1);
+											outLinks.add(url.toString());
+										} catch (MalformedURLException ex) {
+											context.getCounter(COUNTER.LINKS_MALFORMED_URL).increment(1);
+										}
+									}
+									if (respectNofollow && meta.has("name") && (meta.getString("name")
+											.equalsIgnoreCase("robots")
+											|| (nofollowBotPattern != null
+													&& nofollowBotPattern.matcher(meta.getString("name")).matches()))) {
+										// check HTML meta "robots"
+										if (meta.has("content")
+												&& nofollowPattern.matcher(meta.getString("content")).find()) {
+											context.getCounter(COUNTER.RECORDS_NOFOLLOW_META_SKIPPED).increment(1);
+											continue record;
+										}
+									}
+								}
+							}
+							if (head.has("Link")) {
+								// <link ...>
+								addOutLinks(context, outLinks, baseUrl, head.getJSONArray("Link"));
+							}
+						}
+						if (htmlMetaData.has("Links")) {
+							JSONArray links = htmlMetaData.getJSONArray("Links");
+							addOutLinks(context, outLinks, baseUrl, links);
+						}
+						context.getCounter(COUNTER.LINKS_PAGE_UNIQ).increment(outLinks.size());
+						if (outlinksWeightedCount) {
+							if (outLinks.size() >= maxOutlinksPerPage) {
+								outVal = one;
+							} else {
+								outVal = new LongWritable(Math.round(1.0d * maxOutlinksPerPage / outLinks.size()));
+							}
+						}
+						int n = 0;
+						for (String url : outLinks) {
+							n++;
+							outKey.set(url);
+							context.write(outKey, outVal);
+							if (n > maxOutlinksPerPage) {
+								context.getCounter(COUNTER.LINKS_PAGE_UNIQ_SKIPPED_MAX_PER_PAGE)
+										.increment(outLinks.size() - n);
+								break;
+							}
+						}
+						context.getCounter(COUNTER.LINKS_PAGE_UNIQ_ACCEPTED).increment(n);
+					} catch (JSONException ex) {
+						context.getCounter(COUNTER.EXCEPTIONS_JSON).increment(1);
+						LOG.error("Caught JSONException while processing record for " + r.getHeader().getUrl(), ex);
+					} catch (MalformedURLException ex) {
+						LOG.error(
+								"Caught MalformedURLException while processing record for " + r.getHeader().getUrl(),
+								ex);
+						context.getCounter(COUNTER.EXCEPTIONS_URL_MALFORMED).increment(1);
+					} catch (Exception ex) {
+						context.getCounter(COUNTER.EXCEPTIONS).increment(1);
+						LOG.error("Caught Exception while processing record for " + r.getHeader().getUrl(), ex);
+					}
+				} catch (Exception ex) {
+					LOG.error("Caught Exception while processing record for " + r.getHeader().getUrl(), ex);
+					context.getCounter(COUNTER.EXCEPTIONS).increment(1);
+				}
+			}
+
+		}
+
+		private void addOutLinks(Context context, Collection<String> outLinks, URL baseUrl, JSONArray links)
+				throws JSONException {
+			context.getCounter(COUNTER.LINKS_TOTAL).increment(links.length());
+			links: for (int i = 0, l = links.length(); i < l; i++) {
+				JSONObject link = links.getJSONObject(i);
+				if (link.has("url") && link.has("path")) {
+					String linkTypeMarker = "";
+					String path = link.getString("path");
+					String urlStr = link.getString("url");
+					path: switch (path) {
+					case "A@/href":
+						if (respectNofollow && link.has("rel")
+								&& nofollowPattern.matcher(link.getString("rel")).find()) {
+							context.getCounter(COUNTER.LINKS_REL_NOFOLLOW_SKIPPED).increment(1);
+							continue links;
+						}
+						break path;
+					case "IMG@/src":
+					case "FORM@/action":
+					case "TD@/background":
+					case "TABLE@/background":
+					case "BODY@/background":
+					case "AUDIO@/src":
+					case "VIDEO@/src":
+					case "TR@/background":
+						// ignore images and media
+						context.getCounter(COUNTER.LINKS_MEDIA_SKIPPED).increment(1);
+						continue links;
+					case "LINK@/href":
+						if (link.has("rel")) {
+							switch (link.getString("rel")) {
+							case "canonical":
+								break path;
+							case "alternate":
+								if (link.has("hreflang")) {
+									// sample translations
+									break path;
+								}
+								if (extractFeed && link.has("type")) {
+									String type = link.getString("type");
+									if ("application/atom+xml".equals(type) || "application/rss+xml".equals(type)) {
+										linkTypeMarker = extractFeedMarker;
+										break path;
+									}
+								}
+								// fall-through for other rel links
+							default:
+								// ignore rels not explicitly listed
+								context.getCounter(COUNTER.LINKS_MEDIA_SKIPPED).increment(1);
+								continue links;
+							}
+						}
+						break path;
+					default:
+						if (dataUriPattern.matcher(path).find()) {
+							if (globalLinkPattern.matcher(urlStr).find()) {
+								// ok, it's a global link, should work
+							} else {
+								// relative links in data-* attributes are not safe because
+								// Javascript is required to make them absolute/global
+								context.getCounter(COUNTER.LINKS_UNSAFE_SKIPPED).increment(1);
+								continue links;
+							}
+						}
+					}
+					context.getCounter(COUNTER.LINKS_PAGE_ACCEPTED).increment(1);
+					try {
+						URL url = new URL(baseUrl, urlStr);
+						outLinks.add(linkTypeMarker + url.toString());
+					} catch (MalformedURLException ex) {
+						context.getCounter(COUNTER.LINKS_MALFORMED_URL).increment(1);
+					}
+				}
+			}
+		}
+	}
+
+	protected static class OutLinkCombiner extends Reducer<Text, LongWritable, Text, LongWritable> {
+		private LongWritable outVal = new LongWritable(1);
+
+		/**
+		 * @return true if text is safe and does not contain any control characters (U+0000 - U+001F)
+		 *         including '\t', '\r', '\n'
+		 */
+		public static boolean isSafeText(Text text) {
+			int pos = 0;
+			for (byte b : text.getBytes()) {
+				if (++pos > text.getLength()) {
+					// cf. Text#getBytes()
+					break;
+				}
+				if ((b & ~((byte) 0x1F)) == 0) {
+					// none of the leading 3 bits is set: 0x00 <= b <= 0x1F
+					return false;
+				}
+			}
+			return true;
+		}
+
+		@Override
+		public void reduce(Text key, Iterable<LongWritable> values, Context context)
+				throws IOException, InterruptedException {
+			if (!isSafeText(key)) {
+				context.getCounter(COUNTER.LINKS_UNSAFE_TEXT_SKIPPED).increment(1);
+				return;
+			}
+			long sum = 0;
+			for (LongWritable val : values) {
+				sum += val.get();
+			}
+			outVal.set(sum);
+			context.write(key, outVal);
+		}
+
+	}
+
+	protected static class OutLinkReducer extends OutLinkCombiner {
+
+		private double sampleProbability = .5;
+		private LongWritable outVal = new LongWritable(1);
+
+		@Override
+		public void setup(Context context) {
+			sampleProbability = context.getConfiguration().getDouble("wat.outlinks.sample.probability", .5);
+			LOG.info("Outlink sample probability = " + sampleProbability);
+			// invert sample probability for comparison with random number (0.0 <= random < 1.0)
+			// choose link if random number is greater than or equal inverted probability
+			sampleProbability = (1.0 - sampleProbability);
+		}
+
+		@Override
+		public void reduce(Text key, Iterable<LongWritable> values, Context context)
+				throws IOException, InterruptedException {
+			if (!isSafeText(key)) {
+				context.getCounter(COUNTER.LINKS_UNSAFE_TEXT_SKIPPED).increment(1);
+				return;
+			}
+			long sum = 0;
+			for (LongWritable val : values) {
+				sum += val.get();
+			}
+			if (sampleProbability <= 0.0 || (sum * Math.random()) >= sampleProbability) {
+				// multiply random by number of times outlink URL has been observed
+				outVal.set(sum);
+				context.write(key, outVal);
+				context.getCounter(COUNTER.LINKS_RANDOM_SAMPLED).increment(1);
+			} else {
+				context.getCounter(COUNTER.LINKS_RANDOM_SKIP).increment(1);
+			}
+		}
+
+	}
+
+	@Override
+	public int run(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: WATSampleOutLinks [-Dproperty=value ...] <outputpath> <inputpath>...");
+			System.err.println("  -Dwat.outlinks.sample.probability=<prob>");
+			System.err.println("  \t\tprobability (0.0 < prob <= 1.0) to select an outlink");
+			System.err.println("  -Dwat.outlinks.max.per.page=n");
+			System.err.println("  \t\tmax. number of accepted outlinks per page");
+			System.err.println("  -Dwat.outlinks.respect.nofollow=<true|false>");
+			System.err.println("  \t\twhether to respect the nofollow link attributes and robots metadata");
+			System.err.println("  -Dwat.outlinks.respect.nofollow.bot.pattern=mybot");
+			System.err.println("  \t\tuser-specific bot name(s) when respecting nofollow robots HTML metadata,");
+			System.err.println("  \t\tdefined as regular expression pattern. The nofollow metadata instructions");
+			System.err.println("  \t\tfor the matched bot(s) are respected in addition to those addressing any bot.");
+			return -1;
+		}
+		Path outputPath = null;
+		List<Path> inputPaths = new ArrayList<>();
+		for (int i = 0; i < args.length; i++) {
+			if (outputPath == null) {
+				outputPath = new Path(args[i]);
+			} else {
+				inputPaths.add(new Path(args[i]));
+			}
+		}
+		return run(outputPath, inputPaths.toArray(new Path[inputPaths.size()]));
+	}
+
+	public int run(Path outputPath, Path[] inputPaths)
+			throws IOException, ClassNotFoundException, InterruptedException {
+		Configuration conf = getConf();
+
+		Job job = Job.getInstance(conf);
+		job.setJarByClass(WATSampleOutLinks.class);
+
+		double sampleProbability = conf.getDouble("wat.outlinks.sample.probability", .5);
+
+		for (int i = 0; i < inputPaths.length; i++) {
+			LOG.info("Input path: " + inputPaths[i]);
+			FileInputFormat.addInputPath(job, inputPaths[i]);
+		}
+
+		FileOutputFormat.setOutputPath(job, outputPath);
+		LOG.info("Output path: " + outputPath);
+
+		job.setInputFormatClass(WARCFileInputFormat.class);
+		job.setOutputFormatClass(TextOutputFormat.class);
+
+		job.setOutputKeyClass(Text.class);
+		job.setOutputValueClass(LongWritable.class);
+
+		job.setMapperClass(OutLinkMapper.class);
+		job.setCombinerClass(OutLinkCombiner.class);
+		if (sampleProbability >= 1.0) {
+			LOG.info("Sample probablity >= 1.0: no random sampling, output all outlinks");
+			job.setReducerClass(OutLinkCombiner.class);
+		} else {
+			LOG.info("Sampling outlinks with probability " + sampleProbability);
+			job.setReducerClass(OutLinkReducer.class);
+		}
+
+		if (job.waitForCompletion(true)) {
+			return 0;
+		}
+		return 1;
+	}
+
+	public static void main(String[] args) throws Exception {
+		int res = ToolRunner.run(new Configuration(), new WATSampleOutLinks(), args);
+		System.exit(res);
+	}
+
+}
diff --git a/src/org/commoncrawl/examples/mapreduce/WATServerType.java b/src/org/commoncrawl/examples/mapreduce/WATServerType.java
index 8e6ea29..3e635fa 100644
--- a/src/org/commoncrawl/examples/mapreduce/WATServerType.java
+++ b/src/org/commoncrawl/examples/mapreduce/WATServerType.java
@@ -1,8 +1,11 @@
 package org.commoncrawl.examples.mapreduce;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -23,9 +26,9 @@
  */
 public class WATServerType extends Configured implements Tool {
 	private static final Logger LOG = Logger.getLogger(WATServerType.class);
-	
+
 	/**
-	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job. 
+	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job.
 	 */
 	public static void main(String[] args) throws Exception {
 		int res = ToolRunner.run(new Configuration(), new WATServerType(), args);
@@ -34,42 +37,57 @@ public static void main(String[] args) throws Exception {
 
 	/**
 	 * Builds and runs the Hadoop job.
-	 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
+	 * 
+	 * @param args command line arguments
+	 * @return 0 if the Hadoop job completes successfully and 1 otherwise.
 	 */
 	@Override
-	public int run(String[] arg0) throws Exception {
+	public int run(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: " + this.getClass().getSimpleName() + " <outputpath> <inputpath>...");
+			return -1;
+		}
+		Path outputPath = null;
+		List<Path> inputPaths = new ArrayList<>();
+		for (int i = 0; i < args.length; i++) {
+			if (outputPath == null) {
+				outputPath = new Path(args[i]);
+			} else {
+				inputPaths.add(new Path(args[i]));
+			}
+		}
+		return run(outputPath, inputPaths.toArray(new Path[inputPaths.size()]));
+	}
+
+	public int run(Path outputPath, Path[] inputPaths)
+			throws IOException, ClassNotFoundException, InterruptedException {
 		Configuration conf = getConf();
-		//
-		Job job = new Job(conf);
+
+		Job job = Job.getInstance(conf);
 		job.setJarByClass(WATServerType.class);
 		job.setNumReduceTasks(1);
-		
-		String inputPath = "data/*.warc.wat.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
-		LOG.info("Input path: " + inputPath);
-		FileInputFormat.addInputPath(job, new Path(inputPath));
-		
-		String outputPath = "/tmp/cc/";
-		FileSystem fs = FileSystem.newInstance(conf);
-		if (fs.exists(new Path(outputPath))) {
-			fs.delete(new Path(outputPath), true);
+
+		for (int i = 0; i < inputPaths.length; i++) {
+			LOG.info("Input path: " + inputPaths[i]);
+			FileInputFormat.addInputPath(job, inputPaths[i]);
 		}
-		FileOutputFormat.setOutputPath(job, new Path(outputPath));
-		
+
+		LOG.info("Output path: " + outputPath);
+		FileOutputFormat.setOutputPath(job, outputPath);
+
 		job.setInputFormatClass(WARCFileInputFormat.class);
 		job.setOutputFormatClass(TextOutputFormat.class);
-		
+
 		job.setOutputKeyClass(Text.class);
-	    job.setOutputValueClass(LongWritable.class);
-	    
-	    job.setMapperClass(ServerTypeMap.ServerMapper.class);
-	    job.setReducerClass(LongSumReducer.class);
-		
-	    if (job.waitForCompletion(true)) {
-	    	return 0;
-	    } else {
-	    	return 1;
-	    }
+		job.setOutputValueClass(LongWritable.class);
+
+		job.setMapperClass(ServerTypeMap.ServerMapper.class);
+		job.setReducerClass(LongSumReducer.class);
+
+		if (job.waitForCompletion(true)) {
+			return 0;
+		} else {
+			return 1;
+		}
 	}
 }
diff --git a/src/org/commoncrawl/examples/mapreduce/WETWordCount.java b/src/org/commoncrawl/examples/mapreduce/WETWordCount.java
index 3094fa8..b9e6bad 100644
--- a/src/org/commoncrawl/examples/mapreduce/WETWordCount.java
+++ b/src/org/commoncrawl/examples/mapreduce/WETWordCount.java
@@ -1,8 +1,11 @@
 package org.commoncrawl.examples.mapreduce;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -23,9 +26,9 @@
  */
 public class WETWordCount extends Configured implements Tool {
 	private static final Logger LOG = Logger.getLogger(WETWordCount.class);
-	
+
 	/**
-	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job. 
+	 * Main entry point that uses the {@link ToolRunner} class to run the Hadoop job.
 	 */
 	public static void main(String[] args) throws Exception {
 		int res = ToolRunner.run(new Configuration(), new WETWordCount(), args);
@@ -34,43 +37,58 @@ public static void main(String[] args) throws Exception {
 
 	/**
 	 * Builds and runs the Hadoop job.
-	 * @return	0 if the Hadoop job completes successfully and 1 otherwise.
+	 * 
+	 * @param args command line arguments
+	 * @return 0 if the Hadoop job completes successfully and 1 otherwise.
 	 */
 	@Override
-	public int run(String[] arg0) throws Exception {
+	public int run(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: " + this.getClass().getSimpleName() + " <outputpath> <inputpath>...");
+			return -1;
+		}
+		Path outputPath = null;
+		List<Path> inputPaths = new ArrayList<>();
+		for (int i = 0; i < args.length; i++) {
+			if (outputPath == null) {
+				outputPath = new Path(args[i]);
+			} else {
+				inputPaths.add(new Path(args[i]));
+			}
+		}
+		return run(outputPath, inputPaths.toArray(new Path[inputPaths.size()]));
+	}
+
+	public int run(Path outputPath, Path[] inputPaths)
+			throws IOException, ClassNotFoundException, InterruptedException {
 		Configuration conf = getConf();
-		//
-		Job job = new Job(conf);
+
+		Job job = Job.getInstance(conf);
 		job.setJarByClass(WETWordCount.class);
 		job.setNumReduceTasks(1);
-		
-		String inputPath = "data/*.warc.wet.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
-		//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
-		LOG.info("Input path: " + inputPath);
-		FileInputFormat.addInputPath(job, new Path(inputPath));
-		
-		String outputPath = "/tmp/cc/";
-		FileSystem fs = FileSystem.newInstance(conf);
-		if (fs.exists(new Path(outputPath))) {
-			fs.delete(new Path(outputPath), true);
+
+		for (int i = 0; i < inputPaths.length; i++) {
+			LOG.info("Input path: " + inputPaths[i]);
+			FileInputFormat.addInputPath(job, inputPaths[i]);
 		}
-		FileOutputFormat.setOutputPath(job, new Path(outputPath));
-		
+
+		LOG.info("Output path: " + outputPath);
+		FileOutputFormat.setOutputPath(job, outputPath);
+
 		job.setInputFormatClass(WARCFileInputFormat.class);
 		job.setOutputFormatClass(TextOutputFormat.class);
-		
+
 		job.setOutputKeyClass(Text.class);
-	    job.setOutputValueClass(LongWritable.class);
-	    
-	    job.setMapperClass(WordCounterMap.WordCountMapper.class);
-	    // The reducer is quite useful in the word frequency task 
-	    job.setReducerClass(LongSumReducer.class);
-		
-	    if (job.waitForCompletion(true)) {
-	    	return 0;
-	    } else {
-	    	return 1;
-	    }
+		job.setOutputValueClass(LongWritable.class);
+
+		job.setMapperClass(WordCounterMap.WordCountMapper.class);
+		// The reducer is quite useful in the word frequency task
+		job.setReducerClass(LongSumReducer.class);
+
+		if (job.waitForCompletion(true)) {
+			return 0;
+		} else {
+			return 1;
+		}
 	}
 }
diff --git a/src/org/commoncrawl/examples/mapreduce/WordCounterMap.java b/src/org/commoncrawl/examples/mapreduce/WordCounterMap.java
index 3f0211a..d8d549e 100644
--- a/src/org/commoncrawl/examples/mapreduce/WordCounterMap.java
+++ b/src/org/commoncrawl/examples/mapreduce/WordCounterMap.java
@@ -13,11 +13,12 @@
 
 public class WordCounterMap {
 	private static final Logger LOG = Logger.getLogger(WordCounterMap.class);
+
 	protected static enum MAPPERCOUNTER {
-		RECORDS_IN,
-		EMPTY_PAGE_TEXT,
-		EXCEPTIONS,
-		NON_PLAIN_TEXT
+		RECORDS_IN, //
+		EMPTY_PAGE_TEXT, //
+		EXCEPTIONS, //
+		NON_PLAIN_TEXT //
 	}
 
 	protected static class WordCountMapper extends Mapper<Text, ArchiveReader, Text, LongWritable> {
@@ -48,8 +49,7 @@ public void map(Text key, ArchiveReader value, Context context) throws IOExcepti
 					} else {
 						context.getCounter(MAPPERCOUNTER.NON_PLAIN_TEXT).increment(1);
 					}
-				}
-				catch (Exception ex) {
+				} catch (Exception ex) {
 					LOG.error("Caught Exception", ex);
 					context.getCounter(MAPPERCOUNTER.EXCEPTIONS).increment(1);
 				}
diff --git a/src/org/commoncrawl/warc/WARCFileInputFormat.java b/src/org/commoncrawl/warc/WARCFileInputFormat.java
index 89d2433..d752774 100644
--- a/src/org/commoncrawl/warc/WARCFileInputFormat.java
+++ b/src/org/commoncrawl/warc/WARCFileInputFormat.java
@@ -12,8 +12,8 @@
 import org.archive.io.ArchiveReader;
 
 /**
- * Minimal implementation of FileInputFormat for WARC files.
- * Hadoop is told that splitting these compressed files is not possible.
+ * Minimal implementation of FileInputFormat for WARC files. Hadoop is told that splitting these
+ * compressed files is not possible.
  *
  * @author Stephen Merity (Smerity)
  */
@@ -24,7 +24,7 @@ public RecordReader<Text, ArchiveReader> createRecordReader(InputSplit split, Ta
 			throws IOException, InterruptedException {
 		return new WARCFileRecordReader();
 	}
-	
+
 	@Override
 	protected boolean isSplitable(JobContext context, Path filename) {
 		// As these are compressed files, they cannot be (sanely) split
diff --git a/src/org/commoncrawl/warc/WARCFileRecordReader.java b/src/org/commoncrawl/warc/WARCFileRecordReader.java
index b1e8e1e..a31d6ad 100644
--- a/src/org/commoncrawl/warc/WARCFileRecordReader.java
+++ b/src/org/commoncrawl/warc/WARCFileRecordReader.java
@@ -15,9 +15,9 @@
 import org.archive.io.warc.WARCReaderFactory;
 
 /**
- * The WARC File Record Reader processes a single compressed input.
- * The Record Reader returns a single WARC ArchiveReader that can contain
- * numerous individual documents, each document handled in a single mapper.
+ * The WARC File Record Reader processes a single compressed input. The Record Reader returns a
+ * single WARC ArchiveReader that can contain numerous individual documents, each document
+ * handled in a single mapper.
  *
  * @author Stephen Merity (Smerity)
  */
@@ -28,8 +28,7 @@ public class WARCFileRecordReader extends RecordReader<Text, ArchiveReader> {
 	private boolean hasBeenRead = false;
 
 	@Override
-	public void initialize(InputSplit inputSplit, TaskAttemptContext context)
-			throws IOException, InterruptedException {
+	public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
 		FileSplit split = (FileSplit) inputSplit;
 		Configuration conf = context.getConfiguration();
 		Path path = split.getPath();