diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 2421cef3..bb63cd56 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -34,4 +34,4 @@ jobs:
restore-keys: |
${{ runner.os }}-maven-
- name: Build with Maven
- run: mvn -B package --file pom.xml
+ run: mvn -B verify --file pom.xml
diff --git a/pom.xml b/pom.xml
index 73ba9ba2..3dca19e1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,6 +48,7 @@
UTF-8
${maven.build.timestamp}
yyyyMMddhhmmss
+ 8
@@ -164,8 +165,8 @@
maven-compiler-plugin
3.14.1
- 8
- 8
+ ${java.version}
+ ${java.version}
@@ -173,6 +174,33 @@
maven-surefire-plugin
3.2.5
+
+ de.thetaphi
+ forbiddenapis
+ 3.10
+
+ ${java.version}
+ true
+
+ false
+
+ jdk-unsafe
+ jdk-deprecated
+ jdk-non-portable
+
+
+ src/test/resources/forbidden-apis-signatures.txt
+
+
+
+
+
+ check
+ testCheck
+
+
+
+
diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java
index 69591931..1ccbf771 100644
--- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java
+++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
import java.util.logging.Logger;
import org.archive.resource.Resource;
@@ -12,13 +13,18 @@
import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class DumpingExtractorOutput implements ExtractorOutput {
private static final Logger LOG =
Logger.getLogger(DumpingExtractorOutput.class.getName());
private PrintStream out;
public DumpingExtractorOutput(OutputStream out) {
- this.out = new PrintStream(out);
+ try {
+ this.out = new PrintStream(out, false, UTF_8.name());
+ } catch (UnsupportedEncodingException e) {
+ }
}
public void output(Resource resource) throws IOException {
diff --git a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java
index 0afe16fb..567b1cd8 100644
--- a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java
+++ b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java
@@ -1,6 +1,7 @@
package org.archive.extract;
import java.util.Iterator;
+import java.util.Locale;
import java.util.logging.Logger;
import org.archive.format.arc.ARCConstants;
@@ -68,14 +69,14 @@ private boolean childFieldStartsWith(MetaData m, String child,
String key, String search) {
String val = getChildField(m,child,key);
return val == null ? false :
- val.toLowerCase().startsWith(search.toLowerCase());
+ val.toLowerCase(Locale.ROOT).startsWith(search.toLowerCase(Locale.ROOT));
}
private boolean childFieldContains(MetaData m, String child,
String key, String search) {
String val = getChildField(m,child,key);
return val == null ? false :
- val.toLowerCase().contains(search.toLowerCase());
+ val.toLowerCase(Locale.ROOT).contains(search.toLowerCase(Locale.ROOT));
}
private boolean childFieldEquals(MetaData m, String child,
@@ -88,7 +89,7 @@ private boolean childFieldEquals(MetaData m, String child,
private String caseInsensitiveKeyScan(MetaData m, String child, String k) {
try {
if(m.has(child)) {
- String kLC = k.toLowerCase();
+ String kLC = k.toLowerCase(Locale.ROOT);
JSONObject childJSObj = m.getJSONObject(child);
@SuppressWarnings("rawtypes")
Iterator i = childJSObj.keys();
@@ -96,7 +97,7 @@ private String caseInsensitiveKeyScan(MetaData m, String child, String k) {
Object kObj = i.next();
if(kObj instanceof String) {
String kString = (String) kObj;
- if(kString.toLowerCase().equals(kLC)) {
+ if(kString.toLowerCase(Locale.ROOT).equals(kLC)) {
return childJSObj.getString(kString);
}
}
@@ -128,7 +129,7 @@ private boolean isHTTPARCResource(MetaData envelope) {
private boolean isHTMLHttpResource(MetaData m) {
String type = caseInsensitiveKeyScan(m,HTTP_HEADERS_LIST,
"Content-Type");
- return type == null ? false : type.toLowerCase().contains("html");
+ return type == null ? false : type.toLowerCase(Locale.ROOT).contains("html");
}
private boolean isWARCType(MetaData envelope, WARCRecordType type) {
diff --git a/src/main/java/org/archive/extract/ExtractingResourceProducer.java b/src/main/java/org/archive/extract/ExtractingResourceProducer.java
index de671bee..07cdb88a 100644
--- a/src/main/java/org/archive/extract/ExtractingResourceProducer.java
+++ b/src/main/java/org/archive/extract/ExtractingResourceProducer.java
@@ -1,6 +1,7 @@
package org.archive.extract;
import java.io.IOException;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -33,7 +34,7 @@ public Resource getNext() throws ResourceParseException, IOException {
return current;
}
if(LOG.isLoggable(Level.INFO)) {
- LOG.info(String.format("Extracting (%s) with (%s)\n",
+ LOG.info(String.format(Locale.ROOT, "Extracting (%s) with (%s)\n",
current.getClass().toString(),
f.getClass().toString()));
}
diff --git a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java
index fb6dc847..6cb7c445 100644
--- a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java
+++ b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
@@ -10,12 +11,17 @@
import org.archive.resource.Resource;
import org.archive.util.StreamCopy;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class JSONViewExtractorOutput implements ExtractorOutput {
private PrintStream out;
private JSONView view;
public JSONViewExtractorOutput(OutputStream out, String filterPath) {
view = new JSONView(filterPath.split(","));
- this.out = new PrintStream(out);
+ try {
+ this.out = new PrintStream(out, false, UTF_8.name());
+ } catch (UnsupportedEncodingException e) {
+ }
}
public void output(Resource resource) throws IOException {
StreamCopy.readToEOF(resource.getInputStream());
diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
index e6f6e82f..ff0b9e83 100644
--- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
+++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java
@@ -8,6 +8,7 @@
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -131,7 +132,7 @@ public void output(Resource resource) throws IOException {
} else {
meta = "-";
}
- if(mime.toLowerCase().contains("html")) {
+ if(mime.toLowerCase(Locale.ROOT).contains("html")) {
if(redir.equals("-")) {
// maybe an obvious meta-refresh?
redir = extractHTMLMetaRefresh(origUrl,m);
@@ -202,7 +203,7 @@ public void output(Resource resource) throws IOException {
} else {
meta = "-";
}
- if(mime.toLowerCase().contains("html")) {
+ if(mime.toLowerCase(Locale.ROOT).contains("html")) {
if(redir.equals("-")) {
// maybe an obvious meta-refresh?
redir = extractHTMLMetaRefresh(origUrl,m);
@@ -222,7 +223,8 @@ public void output(Resource resource) throws IOException {
canUrl = keyMaker.makeKey(origUrl);
// URL DATE OURL MIME HTTP-CODE SHA1 META REDIR OFFSET LENGTH FILE
if(dumpJSON) {
- out.format("%s %s %s %s %s %s %s %s %s %s %s %s\n",
+ out.format(Locale.ROOT,
+ "%s %s %s %s %s %s %s %s %s %s %s %s\n",
canUrl,
date,
origUrl,
@@ -236,7 +238,8 @@ public void output(Resource resource) throws IOException {
filename,
m.toString(1));
} else {
- out.format("%s %s %s %s %s %s %s %s %s %s %s\n",
+ out.format(Locale.ROOT,
+ "%s %s %s %s %s %s %s %s %s %s %s\n",
canUrl,
date,
origUrl,
@@ -269,7 +272,7 @@ private String extractHTMLRobots(MetaData m) {
if(meta != null) {
String name = scanHeadersLC(meta, "name", null);
if(name != null) {
- if(name.toLowerCase().equals("robots")) {
+ if(name.toLowerCase(Locale.ROOT).equals("robots")) {
// alright - some robot instructions:
String content = scanHeadersLC(meta, "content", null);
if(content != null) {
@@ -291,7 +294,7 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) {
if(meta != null) {
String name = scanHeadersLC(meta, "http-equiv", null);
if(name != null) {
- if(name.toLowerCase().equals("refresh")) {
+ if(name.toLowerCase(Locale.ROOT).equals("refresh")) {
// alright - some robot instructions:
String content = scanHeadersLC(meta, "content", null);
if(content != null) {
@@ -330,7 +333,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) {
if(o.length() == 0) {
return defaultVal;
}
- String lc = match.toLowerCase().trim();
+ String lc = match.toLowerCase(Locale.ROOT).trim();
// try {
// System.err.println("REC:" + o.toString(1));
// } catch (JSONException e1) {
@@ -338,7 +341,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) {
// e1.printStackTrace();
// }
for(String key : JSONObject.getNames(o)) {
- if(lc.equals(key.toLowerCase().trim())) {
+ if(lc.equals(key.toLowerCase(Locale.ROOT).trim())) {
try {
return o.getString(key).trim();
} catch (JSONException e) {
@@ -472,7 +475,7 @@ private String parseRobotInstructions(String input) {
if(input == null) {
return "-";
}
- String up = input.replaceAll("-", "").toUpperCase();
+ String up = input.replaceAll("-", "").toUpperCase(Locale.ROOT);
StringBuilder sb = new StringBuilder(3);
if(up.contains(NO_FOLLOW_MATCH)) {
sb.append("F");
diff --git a/src/main/java/org/archive/extract/ResourceExtractor.java b/src/main/java/org/archive/extract/ResourceExtractor.java
index 2812aa5b..d9b9f396 100644
--- a/src/main/java/org/archive/extract/ResourceExtractor.java
+++ b/src/main/java/org/archive/extract/ResourceExtractor.java
@@ -7,7 +7,8 @@
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URISyntaxException;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -26,7 +27,6 @@ public class ResourceExtractor implements ResourceConstants, Tool {
private final static Logger LOG =
Logger.getLogger(ResourceExtractor.class.getName());
- Charset UTF8 = Charset.forName("utf-8");
public final static String TOOL_NAME = "extractor";
public static final String TOOL_DESCRIPTION =
"A tool for extracting metadata from WARC, ARC, and WAT files";
@@ -65,7 +65,7 @@ public static void main(String[] args) throws Exception {
private PrintWriter makePrintWriter(OutputStream os)
{
- return new PrintWriter(new OutputStreamWriter(os, Charset.forName("UTF-8")));
+ return new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
}
public int run(String[] args)
@@ -138,18 +138,18 @@ public int run(String[] args)
out.output(r);
} catch(GZIPFormatException e) {
- LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage()));
+ LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()));
//Log is not coming out for some damn reason....needs to be studied
- System.err.format("%s: %s",exProducer.getContext(),e.getMessage());
+ System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage());
if(ProducerUtils.STRICT_GZ) {
throw e;
}
e.printStackTrace();
} catch(ResourceParseException e) {
- LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage()));
+ LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()));
//Log is not coming out for some damn reason....needs to be studied
- System.err.format("%s: %s",exProducer.getContext(),e.getMessage());
+ System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage());
if(ProducerUtils.STRICT_GZ) {
throw e;
@@ -157,9 +157,9 @@ public int run(String[] args)
e.printStackTrace();
} catch(RecoverableRecordFormatException e) {
// this should not get here - ResourceFactory et al should wrap as ResourceParseExceptions...
- LOG.severe(String.format("RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage()));
+ LOG.severe(String.format(Locale.ROOT, "RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage()));
//Log is not coming out for some damn reason....needs to be studied
- System.err.format("%s: %s",exProducer.getContext(),e.getMessage());
+ System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage());
e.printStackTrace();
diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
index 68f9d1c8..b1050a14 100644
--- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
+++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java
@@ -3,23 +3,16 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
-import java.net.MalformedURLException;
-import java.net.URISyntaxException;
-import java.net.URL;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.archive.format.gzip.GZIPFormatException;
-import org.archive.format.json.JSONUtils;
import org.archive.format.json.SimpleJSONPathSpec;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
-import org.archive.util.IAUtils;
import org.archive.util.StreamCopy;
import org.json.JSONArray;
-import org.json.JSONException;
import org.json.JSONObject;
import com.google.common.io.ByteStreams;
@@ -87,7 +80,7 @@ public void output(Resource resource) throws IOException {
String[] linkParts = outLinkValue.split(" ");
if(linkParts.length > 2)
//'outlinks': 'origUrl date origOutlinkUrl linktype linktext'
- out.format("%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]);
+ out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]);
}
} else if(outputType.equals("hopinfo")) {
String key = obj.get("Name").toString();
@@ -103,7 +96,7 @@ public void output(Resource resource) throws IOException {
}
if(outputType.equals("hopinfo")) {
//'hopinfo': 'origCrawledUrl date origViaUrl hopPathFromVia sourceTag'
- out.format("%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag);
+ out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag);
}
}
}
diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java
index 4b5f72ed..621656b7 100644
--- a/src/main/java/org/archive/extract/WATExtractorOutput.java
+++ b/src/main/java/org/archive/extract/WATExtractorOutput.java
@@ -1,15 +1,14 @@
package org.archive.extract;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
-import java.nio.charset.Charset;
import java.text.ParseException;
import java.net.UnknownHostException;
import java.util.Date;
+import java.util.Locale;
import org.archive.format.gzip.GZIPMemberWriter;
import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream;
@@ -30,13 +29,14 @@
import java.util.logging.Logger;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class WATExtractorOutput implements ExtractorOutput {
WARCRecordWriter recW;
private boolean wroteFirst;
private GZIPMemberWriter gzW;
private static int DEFAULT_BUFFER_RAM = 1024 * 1024;
private int bufferRAM = DEFAULT_BUFFER_RAM;
- private final static Charset UTF8 = Charset.forName("UTF-8");
private String outputFile;
private static final Logger LOG = Logger.getLogger(WATExtractorOutput.class.getName());
@@ -143,7 +143,7 @@ private void writeARC(OutputStream recOut, MetaData md) throws IOException {
String capDateString = extractOrIO(md, "Envelope.ARC-Header-Metadata.Date");
String filename = extractOrIO(md, "Container.Filename");
String offset = extractOrIO(md, "Container.Offset");
- String recId = String.format("",filename,offset);
+ String recId = String.format(Locale.ROOT, "",filename,offset);
writeWARCMDRecord(recOut,md,targetURI,capDateString,recId);
}
@@ -156,7 +156,7 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException {
targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI");
}
// handle date of generation in WARC format
- DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
+ DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.ROOT);
String capDateString = dateFormat.format(new Date());
String recId = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Record-ID");
writeWARCMDRecord(recOut,md,targetURI,capDateString,recId);
@@ -168,7 +168,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md,
ByteArrayOutputStream bos = new ByteArrayOutputStream();
- OutputStreamWriter osw = new OutputStreamWriter(bos, UTF8);
+ OutputStreamWriter osw = new OutputStreamWriter(bos, UTF_8);
try {
md.write(osw);
} catch (JSONException e1) {
@@ -176,7 +176,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md,
throw new IOException(e1);
}
osw.flush();
-// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes("UTF-8"));
+// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes(UTF_8));
Date capDate;
try {
capDate = DateUtils.getSecondsSinceEpoch(capDateString);
diff --git a/src/main/java/org/archive/format/arc/ARCConstants.java b/src/main/java/org/archive/format/arc/ARCConstants.java
index 5987b49f..39dbf7ed 100755
--- a/src/main/java/org/archive/format/arc/ARCConstants.java
+++ b/src/main/java/org/archive/format/arc/ARCConstants.java
@@ -1,6 +1,7 @@
package org.archive.format.arc;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.zip.Deflater;
@@ -16,7 +17,7 @@
*/
public interface ARCConstants extends ArchiveFileConstants {
public final static int MAX_META_LENGTH = 1024 * 32;
- public final static Charset ARC_META_CHARSET = Charset.forName("utf-8");
+ public final static Charset ARC_META_CHARSET = StandardCharsets.UTF_8;
public final static int NEW_LINE_ORD = 10;
public static final int CARRIAGE_RETURN_ORD = 13;
public final static String DELIMITER = " ";
diff --git a/src/main/java/org/archive/format/arc/FiledescRecordParser.java b/src/main/java/org/archive/format/arc/FiledescRecordParser.java
index c2d7bb65..6a34eb5d 100644
--- a/src/main/java/org/archive/format/arc/FiledescRecordParser.java
+++ b/src/main/java/org/archive/format/arc/FiledescRecordParser.java
@@ -5,6 +5,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
public class FiledescRecordParser {
public boolean strict = false;
@@ -12,7 +13,7 @@ public FiledescRecord parse(InputStream is) throws IOException {
FiledescRecord rec = new FiledescRecord();
try {
// TODO: count input bytes read...
- BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String line = br.readLine();
parseLine1(rec,line);
line = br.readLine();
diff --git a/src/main/java/org/archive/format/cdx/CDXFile.java b/src/main/java/org/archive/format/cdx/CDXFile.java
index 7dca0464..612f7454 100644
--- a/src/main/java/org/archive/format/cdx/CDXFile.java
+++ b/src/main/java/org/archive/format/cdx/CDXFile.java
@@ -18,6 +18,8 @@
import org.archive.util.iterator.CloseableIterator;
import org.archive.util.zip.OpenJDK7GZIPInputStream;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class CDXFile extends SortedTextFile implements CDXInputSource {
public CDXFile(String uri) throws IOException {
@@ -94,7 +96,7 @@ public static BufferedReader createStreamingLineReader(String uri, boolean gzipp
input = new OpenJDK7GZIPInputStream(swis);
}
- BufferedReader reader = new BufferedReader(new InputStreamReader(input));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(input, UTF_8));
return reader;
}
diff --git a/src/main/java/org/archive/format/dns/DNSResponseParser.java b/src/main/java/org/archive/format/dns/DNSResponseParser.java
index b5f81633..3e868ccf 100644
--- a/src/main/java/org/archive/format/dns/DNSResponseParser.java
+++ b/src/main/java/org/archive/format/dns/DNSResponseParser.java
@@ -5,6 +5,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
public class DNSResponseParser {
@@ -28,7 +29,7 @@ public void parse(InputStream is, DNSResponse response) throws IOException, DNSP
try {
// TODO: should we wrap in a CountingInputStream and indicate
// observed octet-length?
- BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String date = br.readLine().trim();
if(isDate(date)) {
response.setDate(date);
diff --git a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java
index d70bf394..154cf5f1 100644
--- a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java
+++ b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.Inflater;
@@ -227,7 +228,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException
}
if(LOG.isLoggable(Level.INFO)) {
- LOG.info(String.format(
+ LOG.info(String.format(Locale.ROOT,
"Got EOF after %d bytes before finding magic in %s\n",
amtSkipped * -1, streamContext));
}
@@ -237,7 +238,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException
if(amtSkipped > 0) {
if(strict) {
if(state == STATE_START) {
- LOG.info(String.format(
+ LOG.info(String.format(Locale.ROOT,
"Strict mode Skipped %d bytes in (%s) before finding magic at offset(%d)\n",
amtSkipped, streamContext, offset-3));
} else {
@@ -248,7 +249,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException
}
if(LOG.isLoggable(Level.INFO)) {
- LOG.info(String.format(
+ LOG.info(String.format(Locale.ROOT,
"Skipped %d bytes in (%s) before finding magic at offset(%d)\n",
amtSkipped, streamContext, offset-3));
}
@@ -268,7 +269,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException
}
offset = currentMemberStartOffset + 3;
stream.setOffset(currentMemberStartOffset + 3);
- LOG.warning(String.format(
+ LOG.warning(String.format(Locale.ROOT,
"GZIPFormatException with record around offset(%d) in (%s)\n",
offset, streamContext));
}
diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java
index a3d34a4b..0a3fa1bf 100644
--- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java
+++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java
@@ -11,8 +11,9 @@
*/
import java.io.BufferedReader;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -35,6 +36,8 @@
import org.archive.util.binsearch.impl.HTTPSeekableLineReader;
import org.archive.util.iterator.CloseableIterator;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class ZipNumCluster extends ZipNumIndex {
final static Logger LOGGER = Logger.getLogger(ZipNumCluster.class.getName());
@@ -367,7 +370,7 @@ protected void loadLastBlockSizes(String filename)
totalAdjustment = 0;
try {
- reader = new BufferedReader(new FileReader(filename));
+ reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), UTF_8));
while ((line = reader.readLine()) != null) {
String[] splits = line.split("\t");
diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java
index a104244a..c0e4e01d 100644
--- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java
+++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java
@@ -3,18 +3,18 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
-import java.nio.charset.Charset;
import org.archive.format.gzip.GZIPMemberWriter;
import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class ZipNumWriter extends GZIPMemberWriterCommittedOutputStream {
int limit;
int count;
OutputStream manifestOut;
ByteArrayOutputStream manifestBuffer;
char delimiter = '\t';
- private static final Charset UTF8 = Charset.forName("utf-8");
public ZipNumWriter(OutputStream main, OutputStream manifest, int limit) {
super(new GZIPMemberWriter(main));
manifestOut = manifest;
@@ -51,7 +51,7 @@ private void finishCurrent() throws IOException {
sb.append(delimiter);
sb.append(len);
sb.append(delimiter);
- manifestOut.write(sb.toString().getBytes(UTF8));
+ manifestOut.write(sb.toString().getBytes(UTF_8));
manifestBuffer.writeTo(manifestOut);
manifestOut.flush();
count = 0;
diff --git a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java
index ed5dfcb2..f1ac16c6 100755
--- a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java
+++ b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java
@@ -1,10 +1,10 @@
package org.archive.format.http;
import java.io.PrintStream;
-import java.nio.charset.Charset;
+import java.util.Locale;
+
public class DumpingHTTPParseObserver implements HttpHeaderObserver {
- private static final Charset UTF8 = Charset.forName("UTF-8");
private PrintStream ps = null;
public DumpingHTTPParseObserver() {
ps = System.out;
@@ -15,13 +15,13 @@ public DumpingHTTPParseObserver(PrintStream ps) {
public void headerParsed(byte[] name, int ns, int nl, byte[] value, int vs,
int vl) {
- ps.format("headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n",
+ ps.format(Locale.ROOT,"headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n",
ns,nl,new String(name,0,nl,UTF8),
vs,vl,new String(value,0,vl,UTF8));
}
public void headersComplete(int bytesRead) {
- ps.format("headersComplete(%d)\n",bytesRead);
+ ps.format(Locale.ROOT,"headersComplete(%d)\n",bytesRead);
}
public void headersCorrupt() {
ps.println("headersCorrupted\n");
diff --git a/src/main/java/org/archive/format/http/HttpConstants.java b/src/main/java/org/archive/format/http/HttpConstants.java
index fa0a7e10..8ae4d4db 100755
--- a/src/main/java/org/archive/format/http/HttpConstants.java
+++ b/src/main/java/org/archive/format/http/HttpConstants.java
@@ -1,9 +1,10 @@
package org.archive.format.http;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
public interface HttpConstants {
- public static final Charset UTF8 = Charset.forName("UTF-8");
+ public static final Charset UTF8 = StandardCharsets.UTF_8;
public static final byte CR = 13;
public static final byte LF = 10;
public static final byte SP = 32;
diff --git a/src/main/java/org/archive/format/http/HttpHeader.java b/src/main/java/org/archive/format/http/HttpHeader.java
index 57b70e1f..9ebe860f 100755
--- a/src/main/java/org/archive/format/http/HttpHeader.java
+++ b/src/main/java/org/archive/format/http/HttpHeader.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.io.OutputStream;
+import java.util.Locale;
public class HttpHeader implements HttpConstants {
private String name = null;
@@ -27,7 +28,7 @@ public void write(OutputStream out) throws IOException {
public String toString() {
StringBuilder sb = new StringBuilder(name.length() + value.length()+20);
- sb.append(String.format("HttpHeader(%s)(%s)",name,value));
+ sb.append(String.format(Locale.ROOT, "HttpHeader(%s)(%s)",name,value));
return sb.toString();
}
}
diff --git a/src/main/java/org/archive/format/http/HttpHeaderParser.java b/src/main/java/org/archive/format/http/HttpHeaderParser.java
index bee3c28b..ddbb6e47 100755
--- a/src/main/java/org/archive/format/http/HttpHeaderParser.java
+++ b/src/main/java/org/archive/format/http/HttpHeaderParser.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
public class HttpHeaderParser implements HttpConstants {
private static final int DEFAULT_MAX_NAME_LENGTH = 1024 * 100;
@@ -288,7 +289,8 @@ public ParseState handleByte(byte b, HttpHeaderParser parser)
return parser.postColonState;
}
if(parser.isStrict) {
- throw new HttpParseException("Illegal char after name("+new String(name,0,nameLength)+")");
+ throw new HttpParseException("Illegal char after name("
+ + new String(name, 0, nameLength, StandardCharsets.ISO_8859_1) + ")");
}
parser.headersCorrupted();
return parser.laxLineEatParseState;
diff --git a/src/main/java/org/archive/format/http/HttpHeaders.java b/src/main/java/org/archive/format/http/HttpHeaders.java
index ed8061d7..a65dd8fb 100755
--- a/src/main/java/org/archive/format/http/HttpHeaders.java
+++ b/src/main/java/org/archive/format/http/HttpHeaders.java
@@ -4,6 +4,7 @@
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Date;
+import java.util.Locale;
import java.util.logging.Logger;
import org.archive.util.ByteOp;
@@ -54,9 +55,9 @@ public String getValue(String name) {
}
public String getValueCaseInsensitive(String name) {
- String lc = name.toLowerCase();
+ String lc = name.toLowerCase(Locale.ROOT);
for(HttpHeader h : this) {
- if(h.getName().toLowerCase().equals(lc)) {
+ if(h.getName().toLowerCase(Locale.ROOT).equals(lc)) {
return h.getValue();
}
}
diff --git a/src/main/java/org/archive/format/http/HttpMessageParser.java b/src/main/java/org/archive/format/http/HttpMessageParser.java
index c4fcdf92..24e59e03 100644
--- a/src/main/java/org/archive/format/http/HttpMessageParser.java
+++ b/src/main/java/org/archive/format/http/HttpMessageParser.java
@@ -1,5 +1,6 @@
package org.archive.format.http;
+import java.util.Locale;
public class HttpMessageParser implements HttpConstants {
@@ -22,11 +23,11 @@ protected int parseVersionLax(byte buf[], int start, int len)
throws HttpParseException {
String v = new String(buf,start,len,UTF8);
- if(v.toLowerCase().compareTo(VERSION_0_STATUS.toLowerCase()) == 0) {
+ if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_0_STATUS.toLowerCase(Locale.ROOT)) == 0) {
return VERSION_0;
- } else if(v.toLowerCase().compareTo(VERSION_1_STATUS.toLowerCase()) == 0) {
+ } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_1_STATUS.toLowerCase(Locale.ROOT)) == 0) {
return VERSION_1;
- } else if(v.toLowerCase().compareTo(VERSION_9_STATUS.toLowerCase()) == 0) {
+ } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_9_STATUS.toLowerCase(Locale.ROOT)) == 0) {
return VERSION_9;
}
return VERSION_0;
diff --git a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java
index f7bc43c7..759bbe5d 100644
--- a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java
+++ b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.Locale;
public class HttpRequestMessageParser extends HttpMessageParser {
public int maxBytes = 1024 * 1024;
@@ -223,7 +224,7 @@ protected int parseMethodStrict(byte buf[], int start, int len)
protected int parseMethodLax(byte buf[], int start, int len)
throws HttpParseException {
- String v = new String(buf,start,len,UTF8).toUpperCase();
+ String v = new String(buf,start,len,UTF8).toUpperCase(Locale.ROOT);
if(v.compareTo(METHOD_GET_STRING) == 0) {
return METHOD_GET;
} else if(v.compareTo(METHOD_HEAD_STRING) == 0) {
diff --git a/src/main/java/org/archive/format/http/HttpResponseMessage.java b/src/main/java/org/archive/format/http/HttpResponseMessage.java
index 0cb7b7e5..6d3f5c35 100755
--- a/src/main/java/org/archive/format/http/HttpResponseMessage.java
+++ b/src/main/java/org/archive/format/http/HttpResponseMessage.java
@@ -1,5 +1,7 @@
package org.archive.format.http;
+import java.util.Locale;
+
public class HttpResponseMessage extends HttpMessage implements HttpResponseMessageObserver {
private int status = 0;
private String reason = null;
@@ -20,10 +22,10 @@ public String getReason() {
return reason;
}
public String toString() {
- return String.format("%s %d %s%s", getVersionString(), status, reason, CRLF);
+ return String.format(Locale.ROOT, "%s %d %s%s", getVersionString(), status, reason, CRLF);
}
public String toDebugString() {
- return String.format("Message(%d):(%s) (%d) (%s)\n",
+ return String.format(Locale.ROOT, "Message(%d):(%s) (%d) (%s)\n",
reason.length(),getVersionString(),status,reason,CRLF);
}
diff --git a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java
index 3aee7c48..4ddef2ad 100755
--- a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java
+++ b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
public class HttpResponseMessageParser extends HttpMessageParser {
public int maxBytes = 1024 * 128;
@@ -97,7 +98,7 @@ public int parseStrict(byte buf[], int len, HttpResponseMessageObserver obs)
version = parseVersionStrict(buf, vs, vl);
status = parseStatusStrict(buf,ss,sl);
- reason = new String(buf,idx+1,(len - idx)-1);
+ reason = new String(buf,idx+1,(len - idx)-1,StandardCharsets.ISO_8859_1);
obs.messageParsed(version, status, reason, len);
@@ -155,7 +156,7 @@ private int parseLax(byte buf[], int len, HttpResponseMessageObserver obs)
idx++;
int reasonLen = bufferEnd - idx;
if(reasonLen > 0) {
- reason = new String(buf,idx,reasonLen);
+ reason = new String(buf,idx,reasonLen,StandardCharsets.ISO_8859_1);
}
} else {
// missed some:
diff --git a/src/main/java/org/archive/format/json/CrossProductOfLists.java b/src/main/java/org/archive/format/json/CrossProductOfLists.java
index f9e2abd2..69cdae33 100644
--- a/src/main/java/org/archive/format/json/CrossProductOfLists.java
+++ b/src/main/java/org/archive/format/json/CrossProductOfLists.java
@@ -4,6 +4,7 @@
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
+import java.util.Locale;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -18,12 +19,12 @@ public List> crossProduct(List>> listOfLists) {
if(LOG.isLoggable(Level.INFO)) {
int count = listOfLists.size();
- LOG.info(String.format("Total of (%d) lists to cross product",count));
+ LOG.info(String.format(Locale.ROOT, "Total of (%d) lists to cross product",count));
for(int i = 0; i < count; i++) {
- LOG.info(String.format("Field (%d) is (%d) deep",i,listOfLists.get(i).size()));
+ LOG.info(String.format(Locale.ROOT, "Field (%d) is (%d) deep",i,listOfLists.get(i).size()));
for(List inner : listOfLists.get(i)) {
LOG.info(
- String.format("----(%d):(%s)"
+ String.format(Locale.ROOT, "----(%d):(%s)"
,i,StringUtils.join(inner.toArray(),",") ) );
}
}
diff --git a/src/main/java/org/archive/format/json/JSONView.java b/src/main/java/org/archive/format/json/JSONView.java
index 7a984ebe..444ea7e6 100644
--- a/src/main/java/org/archive/format/json/JSONView.java
+++ b/src/main/java/org/archive/format/json/JSONView.java
@@ -2,6 +2,7 @@
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -28,7 +29,7 @@ public class JSONView {
public JSONView(String... pathSpecs) {
this.pathSpecs = new ArrayList(pathSpecs.length);
if(LOG.isLoggable(Level.INFO)) {
- LOG.info(String.format("Creating JSONView with(%s)",
+ LOG.info(String.format(Locale.ROOT, "Creating JSONView with(%s)",
StringUtils.join(pathSpecs,",")));
}
for(String pathSpec : pathSpecs) {
diff --git a/src/main/java/org/archive/format/text/charset/CharsetDetector.java b/src/main/java/org/archive/format/text/charset/CharsetDetector.java
index 214fde07..08aac469 100644
--- a/src/main/java/org/archive/format/text/charset/CharsetDetector.java
+++ b/src/main/java/org/archive/format/text/charset/CharsetDetector.java
@@ -22,6 +22,8 @@
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -91,7 +93,7 @@ public abstract class CharsetDetector {
// ...and if the chardet library fails, use the Content-Type header
protected final static String HTTP_CONTENT_TYPE_HEADER = "CONTENT-TYPE";
/** the default charset name to use when giving up */
- public final static String DEFAULT_CHARSET = "UTF-8";
+ public final static String DEFAULT_CHARSET = StandardCharsets.UTF_8.name();
protected boolean isCharsetSupported(String charsetName) {
// can you believe that this throws a runtime? Just asking if it's
@@ -106,7 +108,7 @@ protected boolean isCharsetSupported(String charsetName) {
}
}
protected String mapCharset(String orig) {
- String lc = orig.toLowerCase();
+ String lc = orig.toLowerCase(Locale.ROOT);
if(lc.contains("iso8859-1") || lc.contains("iso-8859-1")) {
return "cp1252";
}
@@ -114,7 +116,7 @@ protected String mapCharset(String orig) {
}
protected String contentTypeToCharset(final String contentType) {
int offset =
- contentType.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase());
+ contentType.toUpperCase(Locale.ROOT).indexOf(CHARSET_TOKEN.toUpperCase(Locale.ROOT));
if (offset != -1) {
String cs = contentType.substring(offset + CHARSET_TOKEN.length());
@@ -148,7 +150,7 @@ protected String getCharsetFromHeaders(HttpHeaders headers)
return null;
}
for(HttpHeader header : headers) {
- if(header.getName().toUpperCase().trim().equals(
+ if(header.getName().toUpperCase(Locale.ROOT).trim().equals(
HTTP_CONTENT_TYPE_HEADER)) {
return contentTypeToCharset(header.getValue());
}
diff --git a/src/main/java/org/archive/format/text/html/NodeUtils.java b/src/main/java/org/archive/format/text/html/NodeUtils.java
index 625d9099..f231b91a 100644
--- a/src/main/java/org/archive/format/text/html/NodeUtils.java
+++ b/src/main/java/org/archive/format/text/html/NodeUtils.java
@@ -19,6 +19,8 @@
*/
package org.archive.format.text.html;
+import java.util.Locale;
+
import org.htmlparser.Node;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TagNode;
@@ -41,7 +43,7 @@ public static boolean isTagNodeNamed(Node node, String name) {
if(isTagNode(node)) {
TagNode tagNode = (TagNode) node;
String nodeName = tagNode.getTagName();
- return nodeName.equals(name.toUpperCase());
+ return nodeName.equals(name.toUpperCase(Locale.ROOT));
}
return false;
}
@@ -50,7 +52,7 @@ public static boolean isOpenTagNodeNamed(Node node, String name) {
TagNode tagNode = (TagNode) node;
if(!tagNode.isEndTag()) {
String nodeName = tagNode.getTagName();
- return nodeName.equals(name.toUpperCase());
+ return nodeName.equals(name.toUpperCase(Locale.ROOT));
}
}
return false;
@@ -60,7 +62,7 @@ public static boolean isNonEmptyOpenTagNodeNamed(Node node, String name) {
TagNode tagNode = (TagNode) node;
if(!tagNode.isEndTag() && !tagNode.isEmptyXmlTag()) {
String nodeName = tagNode.getTagName();
- return nodeName.equals(name.toUpperCase());
+ return nodeName.equals(name.toUpperCase(Locale.ROOT));
}
}
return false;
@@ -70,7 +72,7 @@ public static boolean isCloseTagNodeNamed(Node node, String name) {
TagNode tagNode = (TagNode) node;
if(tagNode.isEndTag()) {
String nodeName = tagNode.getTagName();
- return nodeName.equals(name.toUpperCase());
+ return nodeName.equals(name.toUpperCase(Locale.ROOT));
}
}
return false;
diff --git a/src/main/java/org/archive/format/warc/WARCConstants.java b/src/main/java/org/archive/format/warc/WARCConstants.java
index 72dad45a..a6bdb3f4 100644
--- a/src/main/java/org/archive/format/warc/WARCConstants.java
+++ b/src/main/java/org/archive/format/warc/WARCConstants.java
@@ -19,6 +19,8 @@
package org.archive.format.warc;
+import java.nio.charset.StandardCharsets;
+
import org.archive.format.ArchiveFileConstants;
/**
@@ -93,7 +95,7 @@ public interface WARCConstants extends ArchiveFileConstants {
* till we figure it, DEFAULT_ENCODING is single-byte charset -- same as
* ARCs.
*/
- public static final String DEFAULT_ENCODING = "UTF-8";
+ public static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name();
public static final String HEADER_LINE_ENCODING = DEFAULT_ENCODING;
// TODO: Revisit. 8859 isn't correct, especially if we settle on RFC822
diff --git a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java
index 37c8af99..a3cbb26c 100644
--- a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java
+++ b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java
@@ -2,6 +2,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Locale;
import java.util.logging.Logger;
import org.apache.hadoop.mapreduce.InputFormat;
@@ -54,7 +55,7 @@ public Tuple getNext() throws IOException {
try {
key = reader.getCurrentKey();
- LOG.info(String.format("Loaded key-offset %d\n", key.offset));
+ LOG.info(String.format(Locale.ROOT, "Loaded key-offset %d\n", key.offset));
value = reader.getCurrentValue();
} catch (InterruptedException e) {
// is this needed and the right way?
diff --git a/src/main/java/org/archive/hadoop/FilenameInputFormat.java b/src/main/java/org/archive/hadoop/FilenameInputFormat.java
index 5893afb1..3f41cdee 100644
--- a/src/main/java/org/archive/hadoop/FilenameInputFormat.java
+++ b/src/main/java/org/archive/hadoop/FilenameInputFormat.java
@@ -17,7 +17,6 @@
package org.archive.hadoop;
import java.io.*;
-import java.util.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
diff --git a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java
index 28ebca73..684202bb 100644
--- a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java
+++ b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java
@@ -17,7 +17,6 @@
package org.archive.hadoop;
import java.io.*;
-import java.util.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
diff --git a/src/main/java/org/archive/hadoop/ResourceRecordReader.java b/src/main/java/org/archive/hadoop/ResourceRecordReader.java
index 06d3ce2e..88b93dd2 100644
--- a/src/main/java/org/archive/hadoop/ResourceRecordReader.java
+++ b/src/main/java/org/archive/hadoop/ResourceRecordReader.java
@@ -1,6 +1,7 @@
package org.archive.hadoop;
import java.io.IOException;
+import java.util.Locale;
import java.util.logging.Logger;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -111,7 +112,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException {
if(r != null) {
StreamCopy.readToEOF(r.getInputStream());
- LOG.info(String.format("Extracted offset %d\n",
+ LOG.info(String.format(Locale.ROOT, "Extracted offset %d\n",
series.getCurrentMemberStartOffset()));
cachedK = new ResourceContext(name,
series.getCurrentMemberStartOffset());
@@ -121,7 +122,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException {
} catch (ResourceParseException e) {
e.printStackTrace();
throw new IOException(
- String.format("ResourceParseException at(%s)(%d)",
+ String.format(Locale.ROOT, "ResourceParseException at(%s)(%d)",
name,series.getCurrentMemberStartOffset()),
e);
}
diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java
index 449cdc24..070455a5 100644
--- a/src/main/java/org/archive/io/ArchiveReader.java
+++ b/src/main/java/org/archive/io/ArchiveReader.java
@@ -26,12 +26,14 @@
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -44,6 +46,8 @@
import static org.archive.format.ArchiveFileConstants.*;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Reader for an Archive file of Archive {@link ArchiveRecord}s.
@@ -615,7 +619,7 @@ protected static boolean getTrueOrFalse(final String value) {
if (value == null || value.length() <= 0) {
return false;
}
- return Boolean.TRUE.toString().equals(value.toLowerCase());
+ return Boolean.TRUE.toString().equals(value.toLowerCase(Locale.ROOT));
}
/**
@@ -659,7 +663,7 @@ protected void cdxOutput(boolean toFile)
DOT_COMPRESSED_FILE_EXTENSION);
cdxFilename = stripExtension(cdxFilename, getDotFileExtension());
cdxFilename += ('.' + CDX);
- cdxWriter = new BufferedWriter(new FileWriter(cdxFilename));
+ cdxWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(cdxFilename), UTF_8));
}
String header = "CDX b e a m s c " + ((isCompressed()) ? "V" : "v")
@@ -757,4 +761,4 @@ protected static Options getOptions() {
"'or 'nohead'. Default: 'cdx'."));
return options;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java
index bc316893..fe72236b 100644
--- a/src/main/java/org/archive/io/ArchiveReaderFactory.java
+++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java
@@ -25,6 +25,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
+import java.util.Locale;
import org.archive.io.arc.ARCReaderFactory;
import org.archive.io.warc.WARCReaderFactory;
@@ -296,7 +297,7 @@ protected void addUserAgent(final HttpURLConnection connection) {
* @throws IOException
*/
protected boolean isCompressed(final File f) throws IOException {
- return f.getName().toLowerCase().
+ return f.getName().toLowerCase(Locale.ROOT).
endsWith(DOT_COMPRESSED_FILE_EXTENSION);
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java
index 4bd1fa02..01e8d5ec 100644
--- a/src/main/java/org/archive/io/ArchiveRecord.java
+++ b/src/main/java/org/archive/io/ArchiveRecord.java
@@ -23,6 +23,7 @@
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Locale;
import java.util.logging.Level;
import org.archive.format.ArchiveFileConstants;
@@ -393,7 +394,7 @@ public boolean hasContentHeaders() {
return false;
}
- if (!url.toLowerCase().startsWith("http")) {
+ if (!url.toLowerCase(Locale.ROOT).startsWith("http")) {
return false;
}
diff --git a/src/main/java/org/archive/io/CompositeFileReader.java b/src/main/java/org/archive/io/CompositeFileReader.java
index 14b56219..6e331565 100644
--- a/src/main/java/org/archive/io/CompositeFileReader.java
+++ b/src/main/java/org/archive/io/CompositeFileReader.java
@@ -23,6 +23,8 @@
import java.io.InputStreamReader;
import java.util.List;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* @author gojomo
@@ -34,7 +36,7 @@ public class CompositeFileReader extends InputStreamReader {
* @throws IOException
*/
public CompositeFileReader(List filenames) throws IOException {
- super(new CompositeFileInputStream(filenames));
+ super(new CompositeFileInputStream(filenames), UTF_8);
}
}
diff --git a/src/main/java/org/archive/io/GenericReplayCharSequence.java b/src/main/java/org/archive/io/GenericReplayCharSequence.java
index c427550b..ff96717c 100644
--- a/src/main/java/org/archive/io/GenericReplayCharSequence.java
+++ b/src/main/java/org/archive/io/GenericReplayCharSequence.java
@@ -33,14 +33,15 @@
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
import org.archive.util.DevUtils;
-import com.google.common.base.Charsets;
import com.google.common.primitives.Ints;
/**
@@ -67,7 +68,7 @@ public class GenericReplayCharSequence implements ReplayCharSequence {
*
* See Encoding.
*/
- public static final Charset WRITE_ENCODING = Charsets.UTF_16BE;
+ public static final Charset WRITE_ENCODING = StandardCharsets.UTF_16BE;
private static final long MAP_MAX_BYTES = 64 * 1024 * 1024; // 64M
@@ -168,8 +169,8 @@ private void updateMemoryMappedBuffer() {
long charLength = (long) this.length() - (long) prefixBuffer.limit(); // in characters
long mapSize = Math.min((charLength * bytesPerChar) - mapByteOffset, MAP_MAX_BYTES);
logger.fine("updateMemoryMappedBuffer: mapOffset="
- + NumberFormat.getInstance().format(mapByteOffset)
- + " mapSize=" + NumberFormat.getInstance().format(mapSize));
+ + NumberFormat.getInstance(Locale.ROOT).format(mapByteOffset)
+ + " mapSize=" + NumberFormat.getInstance(Locale.ROOT).format(mapSize));
try {
// TODO: stress-test without these possibly-costly requests!
// System.gc();
@@ -255,9 +256,9 @@ protected void decode(InputStream inStream, int prefixMax,
this.length = Ints.saturatedCast(count);
if(count>Integer.MAX_VALUE) {
logger.warning("input stream is longer than Integer.MAX_VALUE="
- + NumberFormat.getInstance().format(Integer.MAX_VALUE)
+ + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE)
+ " characters -- only first "
- + NumberFormat.getInstance().format(Integer.MAX_VALUE)
+ + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE)
+ " are accessible through this GenericReplayCharSequence");
}
diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java
index 809a9e54..858edb4d 100644
--- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java
+++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java
@@ -25,6 +25,8 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
import org.archive.format.http.HttpHeader;
import org.archive.format.arc.ARCConstants;
@@ -144,20 +146,17 @@ private InputStream readContentHeaders() throws IOException {
int eolCharCount = getEolCharsCount(statusBytes);
if (eolCharCount <= 0) {
throw new IOException("Failed to read raw lie where one " +
- " was expected: " + new String(statusBytes));
+ " was expected: " + new String(statusBytes, ARCConstants.DEFAULT_ENCODING));
}
String statusLine = new String(statusBytes, 0,
statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING);
- if (statusLine == null) {
- throw new NullPointerException("Expected status line is null");
- }
statusLine = statusLine.trim();
// TODO: Tighten up this test.
boolean isHttpResponse = statusLine.startsWith("HTTP");
boolean isHttpRequest = false;
if (!isHttpResponse) {
- isHttpRequest = statusLine.toUpperCase().startsWith("GET") ||
- !statusLine.toUpperCase().startsWith("POST");
+ isHttpRequest = statusLine.toUpperCase(Locale.ROOT).startsWith("GET") ||
+ !statusLine.toUpperCase(Locale.ROOT).startsWith("POST");
}
if (!isHttpResponse && !isHttpRequest) {
throw new UnexpectedStartLineIOException("Failed parse of " +
@@ -185,7 +184,7 @@ private InputStream readContentHeaders() throws IOException {
eolCharCount = getEolCharsCount(lineBytes);
if (eolCharCount <= 0) {
throw new IOException("Failed reading headers: " +
- ((lineBytes != null)? new String(lineBytes): null));
+ ((lineBytes != null)? new String(lineBytes, StandardCharsets.ISO_8859_1): null));
}
// Save the bytes read.
baos.write(lineBytes);
diff --git a/src/main/java/org/archive/io/ReplayCharSequence.java b/src/main/java/org/archive/io/ReplayCharSequence.java
index e456e293..bd74f2f8 100644
--- a/src/main/java/org/archive/io/ReplayCharSequence.java
+++ b/src/main/java/org/archive/io/ReplayCharSequence.java
@@ -23,8 +23,7 @@
import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
-
-import com.google.common.base.Charsets;
+import java.nio.charset.StandardCharsets;
/**
@@ -40,7 +39,7 @@ public interface ReplayCharSequence extends CharSequence, Closeable {
/** charset to use in replay when declared value
* is absent/illegal/unavailable */
- public Charset FALLBACK_CHARSET = Charsets.ISO_8859_1; // TODO: should this be UTF-8?
+ public Charset FALLBACK_CHARSET = StandardCharsets.ISO_8859_1; // TODO: should this be UTF-8?
/**
* Call this method when done so implementation has chance to clean up
diff --git a/src/main/java/org/archive/io/UTF8Bytes.java b/src/main/java/org/archive/io/UTF8Bytes.java
index c280b08d..4dc0144b 100644
--- a/src/main/java/org/archive/io/UTF8Bytes.java
+++ b/src/main/java/org/archive/io/UTF8Bytes.java
@@ -19,6 +19,7 @@
package org.archive.io;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
/**
* Marker Interface for instances that can be serialized as UTF8 bytes.
@@ -27,7 +28,7 @@
* @version $Date$ $Version$
*/
public interface UTF8Bytes {
- public static final String UTF8 = "UTF-8";
+ public static final String UTF8 = StandardCharsets.UTF_8.name();
/**
* @return Instance as UTF-8 bytes.
diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java
index a488354a..5d350534 100644
--- a/src/main/java/org/archive/io/WriterPoolMember.java
+++ b/src/main/java/org/archive/io/WriterPoolMember.java
@@ -25,10 +25,13 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
@@ -52,7 +55,7 @@
public abstract class WriterPoolMember {
private final Logger logger = Logger.getLogger(this.getClass().getName());
- public static final String UTF8 = "UTF-8";
+ public static final String UTF8 = StandardCharsets.UTF_8.name();
/**
* Default archival-aggregate filename template.
@@ -103,12 +106,17 @@ public abstract class WriterPoolMember {
*/
protected static int roundRobinIndex = 0;
+ /**
+ * Symbol set for serial number formatter.
+ */
+ protected static DecimalFormatSymbols serialNoFormatterSymbols = new DecimalFormatSymbols(Locale.ROOT);
+
/**
* NumberFormat instance for formatting serial number.
*
* Pads serial number with zeros.
*/
- protected static NumberFormat serialNoFormatter = new DecimalFormat("00000");
+ protected static NumberFormat serialNoFormatter = new DecimalFormat("00000", serialNoFormatterSymbols);
/**
diff --git a/src/main/java/org/archive/io/arc/ARC2WCDX.java b/src/main/java/org/archive/io/arc/ARC2WCDX.java
index f0515694..aec571e9 100644
--- a/src/main/java/org/archive/io/arc/ARC2WCDX.java
+++ b/src/main/java/org/archive/io/arc/ARC2WCDX.java
@@ -32,6 +32,8 @@
import org.archive.util.ArchiveUtils;
import org.archive.util.SURT;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Create a 'Wide' CDX from an ARC. Takes one argument, the path to the ARC.
* Writes .wcdx.gz in same directory.
@@ -61,7 +63,7 @@ public static Object[] createWcdx(ARCReader reader) {
PrintStream writer = null;
long count = 0;
try {
- writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile)));
+ writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile)), false, UTF_8.name());
// write header: legend + timestamp
StringBuilder legend = new StringBuilder();
diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java
index c9a88415..f8935e79 100644
--- a/src/main/java/org/archive/io/arc/ARCReader.java
+++ b/src/main/java/org/archive/io/arc/ARCReader.java
@@ -27,6 +27,7 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
@@ -447,7 +448,6 @@ public static void createCDXIndexFile(String urlOrPath)
* @throws IOException
* @throws java.text.ParseException
*/
- @SuppressWarnings("unchecked")
public static void main(String [] args)
throws ParseException, IOException, java.text.ParseException {
Options options = getOptions();
@@ -493,7 +493,7 @@ public static void main(String [] args)
break;
case 'f':
- format = cmdlineOptions[i].getValue().toLowerCase();
+ format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT);
boolean match = false;
// List of supported formats.
final String [] supportedFormats =
diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java
index d2f10842..bbcc8b6f 100644
--- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java
+++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java
@@ -27,6 +27,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
+import java.util.Locale;
import java.util.logging.Level;
import org.archive.io.ArchiveReader;
@@ -230,7 +231,7 @@ public static boolean testCompressedARCFile(File arcFile,
throws IOException {
boolean compressedARCFile = false;
FileUtils.assertReadable(arcFile);
- if(!skipSuffixCheck && !arcFile.getName().toLowerCase()
+ if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT)
.endsWith(COMPRESSED_ARC_FILE_EXTENSION)) {
return compressedARCFile;
}
@@ -247,9 +248,9 @@ public static boolean testCompressedARCFile(File arcFile,
public static boolean isARCSuffix(final String arcName) {
return (arcName == null)?
false:
- (arcName.toLowerCase().endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))?
+ (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))?
true:
- (arcName.toLowerCase().endsWith(DOT_ARC_FILE_EXTENSION))?
+ (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_ARC_FILE_EXTENSION))?
true: false;
}
@@ -452,4 +453,4 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException {
logStdErr(Level.WARNING, message);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java
index 0815c18a..c14426a5 100644
--- a/src/main/java/org/archive/io/arc/ARCRecord.java
+++ b/src/main/java/org/archive/io/arc/ARCRecord.java
@@ -27,6 +27,7 @@
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -376,7 +377,7 @@ private ARCRecordMetaData computeMetaData(List keys,
if (keys.size() != values.size()) {
// Early ARCs had a space in mimetype.
if (values.size() == (keys.size() + 1) &&
- values.get(4).toLowerCase().startsWith("charset=")) {
+ values.get(4).toLowerCase(Locale.ROOT).startsWith("charset=")) {
List nuvalues =
new ArrayList(keys.size());
nuvalues.add(0, values.get(0));
@@ -588,7 +589,7 @@ private InputStream readHttpHeader() throws IOException {
if (eolCharCount <= 0) {
throw new RecoverableIOException(
"Failed to read http status where one was expected: "
- + ((statusBytes == null) ? "" : new String(statusBytes)));
+ + ((statusBytes == null) ? "" : new String(statusBytes, DEFAULT_ENCODING)));
}
statusLine = new String(statusBytes, 0,
@@ -658,7 +659,7 @@ private InputStream readHttpHeader() throws IOException {
break;
} else {
throw new IOException("Failed reading http headers: " +
- ((lineBytes != null)? new String(lineBytes): null));
+ ((lineBytes != null)? new String(lineBytes, DEFAULT_ENCODING): null));
}
} else {
httpHeaderBytesRead += lineBytes.length;
diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java
index 5bcb4cc3..05c15abb 100644
--- a/src/main/java/org/archive/io/arc/ARCUtils.java
+++ b/src/main/java/org/archive/io/arc/ARCUtils.java
@@ -27,6 +27,7 @@
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.Locale;
import org.archive.url.UsableURI;
import org.archive.util.zip.GzipHeader;
@@ -94,7 +95,7 @@ public static boolean testCompressedARCFile(File arcFile,
throws IOException {
boolean compressedARCFile = false;
isReadable(arcFile);
- if(!skipSuffixCheck && !arcFile.getName().toLowerCase()
+ if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT)
.endsWith(COMPRESSED_ARC_FILE_EXTENSION)) {
return compressedARCFile;
}
@@ -197,7 +198,7 @@ public static boolean testUncompressedARCFile(File arcFile)
throws IOException {
boolean uncompressedARCFile = false;
isReadable(arcFile);
- if(arcFile.getName().toLowerCase().endsWith(ARC_FILE_EXTENSION)) {
+ if(arcFile.getName().toLowerCase(Locale.ROOT).endsWith(ARC_FILE_EXTENSION)) {
FileInputStream fis = new FileInputStream(arcFile);
try {
byte [] b = new byte[ARC_MAGIC_NUMBER.length()];
diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java
index d33874a3..34583e58 100644
--- a/src/main/java/org/archive/io/warc/WARCReader.java
+++ b/src/main/java/org/archive/io/warc/WARCReader.java
@@ -24,6 +24,7 @@
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
@@ -198,7 +199,6 @@ public static void main(String [] args)
Options options = getOptions();
PosixParser parser = new PosixParser();
CommandLine cmdline = parser.parse(options, args, false);
- @SuppressWarnings("unchecked")
List cmdlineArgs = cmdline.getArgList();
Option [] cmdlineOptions = cmdline.getOptions();
HelpFormatter formatter = new HelpFormatter();
@@ -233,7 +233,7 @@ public static void main(String [] args)
break;
case 'f':
- format = cmdlineOptions[i].getValue().toLowerCase();
+ format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT);
boolean match = false;
// List of supported formats.
final String [] supportedFormats =
@@ -286,4 +286,4 @@ public static void main(String [] args)
}
}
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
index 881da869..70b80340 100644
--- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java
+++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
@@ -26,6 +26,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
+import java.util.Locale;
import org.archive.io.ArchiveReader;
import org.archive.io.ArchiveReaderFactory;
@@ -307,9 +308,9 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException {
public static boolean isWARCSuffix(final String f) {
return (f == null)?
false:
- (f.toLowerCase().endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))?
+ (f.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))?
true:
- (f.toLowerCase().endsWith(DOT_WARC_FILE_EXTENSION))?
+ (f.toLowerCase(Locale.ROOT).endsWith(DOT_WARC_FILE_EXTENSION))?
true: false;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java
index 5c6a6854..65eb3346 100644
--- a/src/main/java/org/archive/io/warc/WARCWriter.java
+++ b/src/main/java/org/archive/io/warc/WARCWriter.java
@@ -38,13 +38,14 @@
import org.apache.commons.lang3.StringUtils;
import org.archive.format.ArchiveFileConstants;
-import org.archive.io.UTF8Bytes;
import org.archive.io.WriterPoolMember;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.Element;
import static org.archive.format.warc.WARCConstants.*;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* WARC implementation.
@@ -357,12 +358,12 @@ public URI writeWarcinfoRecord(String filename, final String description)
byte [] warcinfoBody = null;
if (settings.getMetadata() == null) {
// TODO: What to write into a warcinfo? What to associate?
- warcinfoBody = "TODO: Unimplemented".getBytes();
+ warcinfoBody = "TODO: Unimplemented".getBytes(UTF_8);
} else {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (final Iterator i = settings.getMetadata().iterator();
i.hasNext();) {
- baos.write(i.next().toString().getBytes(UTF8Bytes.UTF8));
+ baos.write(i.next().toString().getBytes(UTF_8));
}
warcinfoBody = baos.toByteArray();
}
diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java
index e436b8dc..79130332 100644
--- a/src/main/java/org/archive/net/PublicSuffixes.java
+++ b/src/main/java/org/archive/net/PublicSuffixes.java
@@ -22,21 +22,24 @@
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.archive.util.TextUtils;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Utility class for making use of the information about 'public suffixes' at
* http://publicsuffix.org.
@@ -189,7 +192,7 @@ public static void main(String args[]) throws IOException {
} else {
is = new FileInputStream(args[0]);
}
- BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8));
String regex = getTopmostAssignedSurtPrefixRegex(reader);
IOUtils.closeQuietly(is);
@@ -197,11 +200,11 @@ public static void main(String args[]) throws IOException {
BufferedWriter writer;
if (args.length >= 2) {
// write to specified file
- writer = new BufferedWriter(new FileWriter(args[1]));
+ writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), UTF_8));
needsClose = true;
} else {
// write to stdout
- writer = new BufferedWriter(new OutputStreamWriter(System.out));
+ writer = new BufferedWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()));
}
writer.append(regex);
writer.flush();
@@ -231,7 +234,7 @@ protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws
// discard utf8 notation after entry
line = line.split("\\s+")[0];
// TODO: maybe we don't need to create lower-cased String
- line = line.toLowerCase();
+ line = line.toLowerCase(Locale.ROOT);
// SURT-order domain segments
String[] segs = line.split("\\.");
StringBuilder sb = new StringBuilder();
@@ -331,16 +334,11 @@ public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() {
public static synchronized String getTopmostAssignedSurtPrefixRegex() {
if (topmostAssignedSurtPrefixRegex == null) {
// use bundled list
- try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- PublicSuffixes.class.getResourceAsStream(
- "/org/archive/effective_tld_names.dat"), "UTF-8"));
- topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader);
- IOUtils.closeQuietly(reader);
- } catch (UnsupportedEncodingException ex) {
- // should never happen
- throw new RuntimeException(ex);
- }
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ PublicSuffixes.class.getResourceAsStream(
+ "/org/archive/effective_tld_names.dat"), UTF_8));
+ topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader);
+ IOUtils.closeQuietly(reader);
}
return topmostAssignedSurtPrefixRegex;
}
diff --git a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java
index 812a3f0d..b111dc1e 100644
--- a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java
+++ b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java
@@ -1,6 +1,7 @@
package org.archive.resource.generic;
import java.io.IOException;
+import java.util.Locale;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
@@ -45,6 +46,6 @@ public void close() throws IOException {
stream.close();
}
public String getContext() {
- return String.format("Context(%s)(%d)", name, stream.getOffset());
+ return String.format(Locale.ROOT, "Context(%s)(%d)", name, stream.getOffset());
}
}
diff --git a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java
index 0fc18162..1058b01b 100644
--- a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java
+++ b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java
@@ -15,6 +15,8 @@
import org.json.JSONException;
import org.json.JSONObject;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class GZIPMetaData extends MetaData implements ResourceConstants {
private static final Logger LOG = Logger.getLogger(GZIPMetaData.class.getName());
@@ -26,7 +28,7 @@ public void setData(GZIPSeriesMember member) {
GZIPHeader header = member.getHeader();
GZIPStaticHeader staticH = header.getStaticHeader();
if(staticH.isFNameSet()) {
- putString(GZIP_FILENAME,new String(header.getFileName(),"UTF-8"));
+ putString(GZIP_FILENAME, new String(header.getFileName(), UTF_8));
}
if(staticH.isFCommentSet()) {
putLong(GZIP_COMMENT_LENGTH,header.getCommentLength());
@@ -39,7 +41,7 @@ public void setData(GZIPSeriesMember member) {
for(int i = 0; i < records; i++) {
GZIPFExtraRecord rec = header.getRecord(i);
JSONObject recJO = new JSONObject();
- String name = new String(rec.getName(),"UTF-8");
+ String name = new String(rec.getName(), UTF_8);
recJO.put(GZIP_FEXTRA_NAME, name);
if(name.equals("SL") || name.equals("LX")) {
recJO.put(GZIP_FEXTRA_VALUE, ByteOp.bytesToInt(rec.getValue()));
@@ -55,8 +57,6 @@ public void setData(GZIPSeriesMember member) {
putLong(GZIP_INFLATED_CRC,footer.getCRC());
putLong(GZIP_INFLATED_LENGTH,footer.getLength());
- } catch (UnsupportedEncodingException e) {
- LOG.warning(e.getMessage());
} catch (JSONException e) {
LOG.warning(e.getMessage());
}
diff --git a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java
index 39611ab8..5267a0f9 100644
--- a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java
+++ b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java
@@ -1,6 +1,7 @@
package org.archive.resource.gzip;
import java.io.IOException;
+import java.util.Locale;
import org.archive.format.gzip.GZIPMemberSeries;
import org.archive.format.gzip.GZIPSeriesMember;
@@ -54,6 +55,6 @@ public void close() throws IOException {
series.close();
}
public String getContext() {
- return String.format("Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset());
+ return String.format(Locale.ROOT, "Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset());
}
}
diff --git a/src/main/java/org/archive/resource/html/HTMLMetaData.java b/src/main/java/org/archive/resource/html/HTMLMetaData.java
index 024d9677..d995cf65 100644
--- a/src/main/java/org/archive/resource/html/HTMLMetaData.java
+++ b/src/main/java/org/archive/resource/html/HTMLMetaData.java
@@ -1,6 +1,7 @@
package org.archive.resource.html;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Logger;
import org.archive.resource.MetaData;
@@ -98,7 +99,7 @@ private void appendObj2(JSONObject o, String arr, String... a) {
} catch(JSONException e) {
try {
- System.err.format("GotErr(%s) JSON(%s)(%s)", e.getMessage(),
+ System.err.format(Locale.ROOT, "GotErr(%s) JSON(%s)(%s)", e.getMessage(),
o.toString(1),a.toString());
} catch (JSONException e1) {
// TODO Auto-generated catch block
diff --git a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java
index 6e95270c..410449a1 100644
--- a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java
+++ b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java
@@ -4,6 +4,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.util.logging.Logger;
import org.archive.format.http.HttpHeaders;
@@ -40,7 +41,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData,
CDATALexer lex = new CDATALexer();
// guess charset based on HTTP header and sniffed content chunk
- String charset = "UTF-8";
+ String charset = StandardCharsets.UTF_8.name();
is = new BufferedInputStream(is, CHARSET_GUESS_CHUNK_SIZE);
byte[] chunk = new byte[CHARSET_GUESS_CHUNK_SIZE];
is.mark(0);
diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java
index a9c3fcc3..a5e5ac35 100644
--- a/src/main/java/org/archive/resource/warc/WARCResource.java
+++ b/src/main/java/org/archive/resource/warc/WARCResource.java
@@ -5,6 +5,7 @@
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Locale;
import org.archive.format.http.HttpHeader;
import org.archive.format.http.HttpResponse;
@@ -43,7 +44,7 @@ public WARCResource(MetaData metaData, ResourceContainer container,
String name = h.getName();
String value = h.getValue();
fields.putString(name,value);
- if(name.toLowerCase().equals("content-length")) {
+ if(name.toLowerCase(Locale.ROOT).equals("content-length")) {
// TODO: catch formatexception
length = Long.parseLong(value);
}
diff --git a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java
index 43041efb..8cc8c146 100644
--- a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java
+++ b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java
@@ -3,7 +3,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
@@ -14,9 +13,9 @@
import org.json.JSONException;
import org.json.JSONTokener;
-public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants {
- private static final Charset UTF8 = Charset.forName("UTF-8");
+import static java.nio.charset.StandardCharsets.UTF_8;
+public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants {
public WARCJSONMetaDataResourceFactory() {
}
@@ -27,7 +26,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData,
MetaData md;
try {
- md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF8)));
+ md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF_8)));
} catch (JSONException e) {
throw new ResourceParseException(e);
}
diff --git a/src/main/java/org/archive/streamcontext/HTTP11Stream.java b/src/main/java/org/archive/streamcontext/HTTP11Stream.java
index 06f51409..995dc53e 100755
--- a/src/main/java/org/archive/streamcontext/HTTP11Stream.java
+++ b/src/main/java/org/archive/streamcontext/HTTP11Stream.java
@@ -5,6 +5,7 @@
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
+import java.util.Locale;
public class HTTP11Stream extends AbstractBufferingStream {
private URL url;
@@ -42,7 +43,7 @@ public int doRead(byte[] b, int off, int len) throws IOException {
public void doSeek(long offset) throws IOException {
doClose();
conn = url.openConnection();
- conn.setRequestProperty("Range", String.format("bytes=%d-", offset));
+ conn.setRequestProperty("Range", String.format(Locale.ROOT, "bytes=%d-", offset));
conn.connect();
is = conn.getInputStream();
}
diff --git a/src/main/java/org/archive/uid/RecordIDGenerator.java b/src/main/java/org/archive/uid/RecordIDGenerator.java
index 4f16c5ab..80cc5565 100644
--- a/src/main/java/org/archive/uid/RecordIDGenerator.java
+++ b/src/main/java/org/archive/uid/RecordIDGenerator.java
@@ -19,7 +19,6 @@
package org.archive.uid;
import java.net.URI;
-import java.net.URISyntaxException;
import java.util.Map;
/**
diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java
index 37b448c1..dd0d9ac7 100644
--- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java
+++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java
@@ -6,7 +6,9 @@
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -64,7 +66,7 @@ public void canonicalize(HandyURL url) {
if (ip != null) {
host = ip;
} else if (host != null) {
- host = escapeOnce(host.toLowerCase());
+ host = escapeOnce(host.toLowerCase(Locale.ROOT));
}
url.setHost(host);
// now the path:
@@ -159,7 +161,7 @@ public String attemptIPFormats(String host) { // throws URIException {
}
ip[i] = octet;
}
- return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]);
+ return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]);
} else {
Matcher m2 = DECIMAL_IP.matcher(host);
if (m2.matches()) {
@@ -190,7 +192,7 @@ public String attemptIPFormats(String host) { // throws URIException {
}
ip[i] = octet;
}
- return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2],
+ return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2],
ip[3]);
}
@@ -203,12 +205,9 @@ public String minimalEscape(String input) {
return escapeOnce(unescapeRepeatedly(input));
}
- protected static Charset _UTF8 = null;
+ protected static Charset _UTF8 = StandardCharsets.UTF_8;
protected static Charset UTF8() {
- if (_UTF8 == null) {
- _UTF8 = Charset.forName("UTF-8");
- }
return _UTF8;
}
@@ -261,7 +260,7 @@ public String escapeOnce(String input) {
}
sb.append("%");
- String hex = Integer.toHexString(b).toUpperCase();
+ String hex = Integer.toHexString(b).toUpperCase(Locale.ROOT);
if (hex.length() == 1) {
sb.append('0');
}
diff --git a/src/main/java/org/archive/url/HandyURL.java b/src/main/java/org/archive/url/HandyURL.java
index 91539b3f..0c2c81f7 100644
--- a/src/main/java/org/archive/url/HandyURL.java
+++ b/src/main/java/org/archive/url/HandyURL.java
@@ -2,6 +2,7 @@
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Locale;
public class HandyURL {
public final static int DEFAULT_PORT = -1;
@@ -277,7 +278,7 @@ public void setOpaque(String opaque) {
}
public String toDebugString() {
- return String.format("Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)",
+ return String.format(Locale.ROOT, "Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)",
scheme, authUser, authPass, host, port, path, query, hash);
}
diff --git a/src/main/java/org/archive/url/IAURLCanonicalizer.java b/src/main/java/org/archive/url/IAURLCanonicalizer.java
index 0cf7c8a4..e964cd00 100644
--- a/src/main/java/org/archive/url/IAURLCanonicalizer.java
+++ b/src/main/java/org/archive/url/IAURLCanonicalizer.java
@@ -2,6 +2,7 @@
import java.util.Arrays;
import java.util.Comparator;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -20,11 +21,11 @@ public void canonicalize(HandyURL url) {
}
if (rules.isSet(SCHEME_SETTINGS, SCHEME_LOWERCASE)) {
if (url.getScheme() != null) {
- url.setScheme(url.getScheme().toLowerCase());
+ url.setScheme(url.getScheme().toLowerCase(Locale.ROOT));
}
}
if(rules.isSet(HOST_SETTINGS, HOST_LOWERCASE)) {
- url.setHost(url.getHost().toLowerCase());
+ url.setHost(url.getHost().toLowerCase(Locale.ROOT));
}
if(rules.isSet(HOST_SETTINGS, HOST_MASSAGE)) {
url.setHost(massageHost(url.getHost()));
@@ -46,7 +47,7 @@ public void canonicalize(HandyURL url) {
url.setPath(null);
} else {
if(rules.isSet(PATH_SETTINGS, PATH_LOWERCASE)) {
- path = path.toLowerCase();
+ path = path.toLowerCase(Locale.ROOT);
}
if(rules.isSet(PATH_SETTINGS, PATH_STRIP_SESSION_ID)) {
path = URLRegexTransformer.stripPathSessionID(path);
@@ -71,7 +72,7 @@ public void canonicalize(HandyURL url) {
}
// lower-case:
if(rules.isSet(QUERY_SETTINGS, QUERY_LOWERCASE)) {
- query = query.toLowerCase();
+ query = query.toLowerCase(Locale.ROOT);
}
// re-order?
if(rules.isSet(QUERY_SETTINGS, QUERY_ALPHA_REORDER)) {
@@ -155,7 +156,7 @@ public static String massageHost(String host) {
return host;
}
public static int getDefaultPort(String scheme) {
- String lcScheme = scheme.toLowerCase();
+ String lcScheme = scheme.toLowerCase(Locale.ROOT);
if(lcScheme.equals("http")) {
return 80;
} else if(lcScheme.equals("https")) {
diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java
index 57071460..9b7485c7 100644
--- a/src/main/java/org/archive/url/LaxURI.java
+++ b/src/main/java/org/archive/url/LaxURI.java
@@ -18,10 +18,12 @@
*/
package org.archive.url;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.BitSet;
+import java.util.Locale;
/**
* URI subclass which allows partial/inconsistent encoding, matching
@@ -121,9 +123,10 @@ protected static String decode(String component, String charset)
byte[] rawdata = null;
rawdata = LaxURLCodec.decodeUrlLoose(component.getBytes(StandardCharsets.US_ASCII));
try {
- return new String(rawdata, charset);
- } catch (UnsupportedEncodingException e) {
- return new String(rawdata);
+ Charset cs = Charset.forName(charset);
+ return new String(rawdata, cs);
+ } catch (IllegalCharsetNameException e) {
+ return new String(rawdata, StandardCharsets.US_ASCII);
}
}
@@ -321,7 +324,7 @@ protected void parseUriReference(String original, boolean escaped)
*
*/
if (at > 0 && at < length && tmp.charAt(at) == ':') {
- char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
+ char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray();
if (validate(target, scheme)) {
_scheme = target;
from = ++at;
diff --git a/src/main/java/org/archive/url/LaxURLCodec.java b/src/main/java/org/archive/url/LaxURLCodec.java
index e27d9de0..b68a0c19 100644
--- a/src/main/java/org/archive/url/LaxURLCodec.java
+++ b/src/main/java/org/archive/url/LaxURLCodec.java
@@ -20,17 +20,16 @@
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import org.apache.commons.codec.net.URLCodec;
-import com.google.common.base.Charsets;
-
/**
* @author gojomo
*/
public class LaxURLCodec extends URLCodec {
- public static LaxURLCodec DEFAULT = new LaxURLCodec("UTF-8");
+ public static LaxURLCodec DEFAULT = new LaxURLCodec(StandardCharsets.UTF_8.name());
// passthrough constructor
public LaxURLCodec(String encoding) {
@@ -155,6 +154,6 @@ public String encode(BitSet safe, String pString, String cs)
if (pString == null) {
return null;
}
- return new String(encodeUrl(safe,pString.getBytes(cs)), Charsets.US_ASCII);
+ return new String(encodeUrl(safe,pString.getBytes(cs)), StandardCharsets.US_ASCII);
}
}
diff --git a/src/main/java/org/archive/url/SURT.java b/src/main/java/org/archive/url/SURT.java
index 3e0bcd55..9598f458 100644
--- a/src/main/java/org/archive/url/SURT.java
+++ b/src/main/java/org/archive/url/SURT.java
@@ -2,7 +2,7 @@
import java.io.BufferedReader;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.logging.Logger;
@@ -33,7 +33,7 @@ public static String toSURT(String input) {
}
public static void main(String[] args) {
String line;
- InputStreamReader isr = new InputStreamReader(System.in,Charset.forName("UTF-8"));
+ InputStreamReader isr = new InputStreamReader(System.in, StandardCharsets.UTF_8);
BufferedReader br = new BufferedReader(isr);
Iterator i = AbstractPeekableIterator.wrapReader(br);
while(i.hasNext()) {
diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java
index 374e0574..492f7772 100644
--- a/src/main/java/org/archive/url/URI.java
+++ b/src/main/java/org/archive/url/URI.java
@@ -34,12 +34,16 @@
import org.apache.commons.codec.net.URLCodec;
import java.io.*;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Hashtable;
import java.util.Locale;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
* This class has the purpose of supportting of parsing a URI reference to
@@ -261,7 +265,7 @@ public URI(String scheme, String schemeSpecificPart, String fragment)
if (scheme == null) {
throw new URIException(URIException.PARSING, "scheme required");
}
- char[] s = scheme.toLowerCase().toCharArray();
+ char[] s = scheme.toLowerCase(Locale.ROOT).toCharArray();
if (validate(s, URI.scheme)) {
_scheme = s; // is_absoluteURI
} else {
@@ -622,7 +626,7 @@ public URI(URI base, URI relative) throws URIException {
/**
* The default charset of the protocol. RFC 2277, 2396
*/
- protected static String defaultProtocolCharset = "UTF-8";
+ protected static String defaultProtocolCharset = UTF_8.name();
/**
@@ -1694,7 +1698,7 @@ private static byte[] getBytes(String original, String charset) {
try {
return original.getBytes(charset);
} catch (UnsupportedEncodingException e) {
- return original.getBytes();
+ return original.getBytes(UTF_8);
}
}
@@ -1780,11 +1784,13 @@ protected static String decode(String component, String charset)
throw new URIException(e.getMessage());
}
try {
- return new String(rawdata, charset);
- } catch (UnsupportedEncodingException e) {
- return new String(rawdata);
+ Charset cs = Charset.forName(charset);
+ return new String(rawdata, cs);
+ } catch (IllegalCharsetNameException e) {
+ return new String(rawdata, StandardCharsets.US_ASCII);
}
}
+
/**
* Pre-validate the unescaped URI string within a specific component.
*
@@ -1954,7 +1960,7 @@ protected void parseUriReference(String original, boolean escaped)
*
*/
if (at > 0 && at < length && tmp.charAt(at) == ':') {
- char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
+ char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray();
if (validate(target, scheme)) {
_scheme = target;
} else {
diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java
index 5f31c81c..182eb218 100644
--- a/src/main/java/org/archive/url/URLRegexTransformer.java
+++ b/src/main/java/org/archive/url/URLRegexTransformer.java
@@ -1,5 +1,6 @@
package org.archive.url;
+import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -27,7 +28,7 @@ public class URLRegexTransformer {
public static String stripOpts(String orig, OptimizedPattern op[]) {
- String origLC = orig.toLowerCase();
+ String origLC = orig.toLowerCase(Locale.ROOT);
StringBuilder sb = null;
int i = 0;
int max = op.length;
diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java
index 08f18999..3038ada5 100644
--- a/src/main/java/org/archive/url/UsableURIFactory.java
+++ b/src/main/java/org/archive/url/UsableURIFactory.java
@@ -23,6 +23,7 @@
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
@@ -609,7 +610,7 @@ private String fixupDomainlabel(String label)
throw ue;
}
}
- label = label.toLowerCase();
+ label = label.toLowerCase(Locale.ROOT);
return label;
}
@@ -755,6 +756,6 @@ private String checkUriElement(String element) {
*/
private String checkUriElementAndLowerCase(String element) {
String tmp = checkUriElement(element);
- return (tmp != null)? tmp.toLowerCase(): tmp;
+ return (tmp != null)? tmp.toLowerCase(Locale.ROOT): tmp;
}
}
diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java
index 22ba2787..cce411df 100644
--- a/src/main/java/org/archive/util/ArchiveUtils.java
+++ b/src/main/java/org/archive/util/ArchiveUtils.java
@@ -49,6 +49,8 @@
import org.archive.format.gzip.GZIPDecoder;
import org.archive.format.gzip.GZIPFormatException;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Miscellaneous useful methods.
*
@@ -851,7 +853,7 @@ private static String loadVersion() {
BufferedReader br = null;
String version;
try {
- br = new BufferedReader(new InputStreamReader(input));
+ br = new BufferedReader(new InputStreamReader(input, UTF_8));
version = br.readLine();
br.readLine();
} catch (IOException e) {
@@ -873,7 +875,7 @@ private static String loadVersion() {
br = null;
String timestamp;
try {
- br = new BufferedReader(new InputStreamReader(input));
+ br = new BufferedReader(new InputStreamReader(input, UTF_8));
timestamp = br.readLine();
} catch (IOException e) {
return version;
@@ -894,13 +896,13 @@ private static String loadVersion() {
TLDS = new HashSet();
InputStream is = ArchiveUtils.class.getResourceAsStream("tlds-alpha-by-domain.txt");
try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(is));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8));
String line;
while((line = reader.readLine())!=null) {
if (line.startsWith("#")) {
continue;
}
- TLDS.add(line.trim().toLowerCase());
+ TLDS.add(line.trim().toLowerCase(Locale.ROOT));
}
} catch (Exception e) {
LOGGER.log(Level.SEVERE,"TLD list unavailable",e);
@@ -917,7 +919,7 @@ private static String loadVersion() {
* @return boolean true if recognized as TLD
*/
public static boolean isTld(String dom) {
- return TLDS.contains(dom.toLowerCase());
+ return TLDS.contains(dom.toLowerCase(Locale.ROOT));
}
public static void closeQuietly(Object input) {
@@ -981,12 +983,12 @@ public static int readFully(InputStream input, byte[] buf)
*/
public static BufferedReader getBufferedReader(File source) throws IOException {
InputStream is = new BufferedInputStream(new FileInputStream(source));
- boolean isGzipped = source.getName().toLowerCase().
+ boolean isGzipped = source.getName().toLowerCase(Locale.ROOT).
endsWith(GZIP_SUFFIX);
if(isGzipped) {
is = new GZIPInputStream(is);
}
- return new BufferedReader(new InputStreamReader(is));
+ return new BufferedReader(new InputStreamReader(is, UTF_8));
}
/**
@@ -1002,8 +1004,8 @@ public static BufferedReader getBufferedReader(URL source) throws IOException {
|| conn.getContentEncoding() != null && conn.getContentEncoding().equalsIgnoreCase("gzip");
InputStream uis = conn.getInputStream();
return new BufferedReader(isGzipped?
- new InputStreamReader(new GZIPInputStream(uis)):
- new InputStreamReader(uis));
+ new InputStreamReader(new GZIPInputStream(uis), UTF_8):
+ new InputStreamReader(uis, UTF_8));
}
/**
diff --git a/src/main/java/org/archive/util/ChunkedInputStream.java b/src/main/java/org/archive/util/ChunkedInputStream.java
index 69b23047..b6a604c8 100644
--- a/src/main/java/org/archive/util/ChunkedInputStream.java
+++ b/src/main/java/org/archive/util/ChunkedInputStream.java
@@ -280,8 +280,7 @@ private static int getChunkSizeFromInputStream(final InputStream in)
* @throws IOException If an IO problem occurs
*/
private void parseTrailerHeaders() throws IOException {
- String charset = "US-ASCII";
- LaxHttpParser.parseHeaders(in, charset);
+ LaxHttpParser.parseHeaders(in, StandardCharsets.US_ASCII.name());
}
/**
diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java
index f2a1d044..7ee4b13a 100644
--- a/src/main/java/org/archive/util/DevUtils.java
+++ b/src/main/java/org/archive/util/DevUtils.java
@@ -25,6 +25,7 @@
import java.io.StringWriter;
import java.util.logging.Logger;
+import static java.nio.charset.StandardCharsets.UTF_8;
/**
* Write a message and stack trace to the 'org.archive.util.DevUtils' logger.
@@ -92,7 +93,7 @@ public static void sigquitSelf() {
Process p = Runtime.getRuntime().exec(
new String[] {"perl", "-e", "print getppid(). \"\n\";"});
BufferedReader br =
- new BufferedReader(new InputStreamReader(p.getInputStream()));
+ new BufferedReader(new InputStreamReader(p.getInputStream(), UTF_8));
String ppid = br.readLine();
Runtime.getRuntime().exec(
new String[] {"sh", "-c", "kill -3 "+ppid}).waitFor();
diff --git a/src/main/java/org/archive/util/FileNameSpec.java b/src/main/java/org/archive/util/FileNameSpec.java
index a3312cfc..7ace8b59 100644
--- a/src/main/java/org/archive/util/FileNameSpec.java
+++ b/src/main/java/org/archive/util/FileNameSpec.java
@@ -1,5 +1,6 @@
package org.archive.util;
+import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
public class FileNameSpec {
@@ -15,7 +16,7 @@ public FileNameSpec(String prefix, String suffix) {
public String getNextName() {
StringBuilder sb = new StringBuilder();
sb.append(prefix);
- sb.append(String.format("%06d",aInt.incrementAndGet()));
+ sb.append(String.format(Locale.ROOT, "%06d",aInt.incrementAndGet()));
sb.append(suffix);
return sb.toString();
}
diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java
index 70b5ffae..271d0212 100644
--- a/src/main/java/org/archive/util/FileUtils.java
+++ b/src/main/java/org/archive/util/FileUtils.java
@@ -32,6 +32,7 @@
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Locale;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -219,8 +220,8 @@ protected static void workaroundCopyFile(final File src,
FileFilter prefixFilter = new FileFilter() {
public boolean accept(File pathname)
{
- return pathname.getName().toLowerCase().
- startsWith(prefix.toLowerCase());
+ return pathname.getName().toLowerCase(Locale.ROOT).
+ startsWith(prefix.toLowerCase(Locale.ROOT));
}
};
return dir.listFiles(prefixFilter);
@@ -283,7 +284,7 @@ public static boolean isReadableWithExtensionAndMagic(final File f,
throws IOException {
boolean result = false;
FileUtils.assertReadable(f);
- if(f.getName().toLowerCase().endsWith(uncompressedExtension)) {
+ if(f.getName().toLowerCase(Locale.ROOT).endsWith(uncompressedExtension)) {
FileInputStream fis = new FileInputStream(f);
try {
byte [] b = new byte[magic.length()];
@@ -392,7 +393,6 @@ public static boolean moveAsideIfExists(File file) throws IOException {
* after the end of the last line returned
* @throws IOException
*/
- @SuppressWarnings("unchecked")
public static LongRange pagedLines(File file, long position,
int signedDesiredLineCount, List lines, int lineEstimate)
throws IOException {
@@ -708,4 +708,4 @@ public static void appendTo(File fileToAppendTo, File fileToAppendFrom) throws I
out.flush();
}
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/archive/util/Grep.java b/src/main/java/org/archive/util/Grep.java
index e446e47e..892429bd 100644
--- a/src/main/java/org/archive/util/Grep.java
+++ b/src/main/java/org/archive/util/Grep.java
@@ -1,10 +1,13 @@
package org.archive.util;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import java.io.BufferedReader;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
+import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
@@ -119,14 +122,14 @@ protected void doTheGrepThing() throws Exception {
if (files != null) {
if (files.size() == 1) {
- grep(new BufferedReader(new FileReader(files.get(0))), "");
+ grep(new BufferedReader(new InputStreamReader(new FileInputStream(files.get(0)), UTF_8)), "");
} else {
for (String path : files) {
- grep(new BufferedReader(new FileReader(path)), path + ": ");
+ grep(new BufferedReader(new InputStreamReader(new FileInputStream(path), UTF_8)), path + ": ");
}
}
} else {
- grep(new BufferedReader(new InputStreamReader(System.in)), "");
+ grep(new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset())), "");
}
}
diff --git a/src/main/java/org/archive/util/HMACSigner.java b/src/main/java/org/archive/util/HMACSigner.java
index d7a5208e..b502b4fb 100644
--- a/src/main/java/org/archive/util/HMACSigner.java
+++ b/src/main/java/org/archive/util/HMACSigner.java
@@ -1,5 +1,7 @@
package org.archive.util;
+import java.nio.charset.StandardCharsets;
+
/**
* Generate an HMAC key given a secret sig, key name and optional id and an expiration time
*
@@ -63,11 +65,11 @@ public static String hmacDigest(String msg, String keyString, String algo) {
String digest = null;
try {
SecretKeySpec key = new SecretKeySpec(
- (keyString).getBytes("UTF-8"), algo);
+ (keyString).getBytes(StandardCharsets.UTF_8), algo);
Mac mac = Mac.getInstance(algo);
mac.init(key);
- byte[] bytes = mac.doFinal(msg.getBytes("ASCII"));
+ byte[] bytes = mac.doFinal(msg.getBytes(StandardCharsets.US_ASCII));
StringBuilder hash = new StringBuilder();
diff --git a/src/main/java/org/archive/util/IAUtils.java b/src/main/java/org/archive/util/IAUtils.java
index 4597d723..334a31b4 100644
--- a/src/main/java/org/archive/util/IAUtils.java
+++ b/src/main/java/org/archive/util/IAUtils.java
@@ -29,13 +29,15 @@
import java.nio.charset.Charset;
import java.util.Properties;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Miscellaneous useful methods.
*
* @author gojomo & others
*/
public class IAUtils {
- public final static Charset UTF8 = Charset.forName("utf-8");
+ public final static Charset UTF8 = UTF_8;
final public static String COMMONS_VERSION = loadCommonsVersion();
final public static String PUBLISHER = loadCommons("publisher");
@@ -53,7 +55,7 @@ public static String loadCommonsVersion() {
BufferedReader br = null;
String version;
try {
- br = new BufferedReader(new InputStreamReader(input));
+ br = new BufferedReader(new InputStreamReader(input, UTF_8));
version = br.readLine();
br.readLine();
} catch (IOException e) {
@@ -71,11 +73,7 @@ public static String loadCommons(String id) {
if (input == null) {
return "UNKNOWN";
}
- try {
- reader = new InputStreamReader(input, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- return "UNKNOWN";
- }
+ reader = new InputStreamReader(input, UTF_8);
Properties prop = new Properties();
try {
prop.load(reader);
diff --git a/src/main/java/org/archive/util/IterableLineIterator.java b/src/main/java/org/archive/util/IterableLineIterator.java
index 33efa1fd..c9010031 100644
--- a/src/main/java/org/archive/util/IterableLineIterator.java
+++ b/src/main/java/org/archive/util/IterableLineIterator.java
@@ -19,7 +19,6 @@ public IterableLineIterator(final Reader reader)
super(reader);
}
- @SuppressWarnings("unchecked")
public Iterator iterator() {
return this;
}
diff --git a/src/main/java/org/archive/util/LaxHttpParser.java b/src/main/java/org/archive/util/LaxHttpParser.java
index 0545fd95..434522c8 100644
--- a/src/main/java/org/archive/util/LaxHttpParser.java
+++ b/src/main/java/org/archive/util/LaxHttpParser.java
@@ -36,6 +36,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.logging.Logger;
@@ -127,7 +128,7 @@ public static String readLine(InputStream inputStream, String charset) throws IO
try {
return new String(rawdata, 0, len - offset, charset);
} catch (UnsupportedEncodingException e) {
- return new String(rawdata, 0, len - offset);
+ return new String(rawdata, 0, len - offset, StandardCharsets.ISO_8859_1);
}
}
@@ -147,7 +148,7 @@ public static String readLine(InputStream inputStream, String charset) throws IO
public static String readLine(InputStream inputStream) throws IOException {
LOG.finest("enter LaxHttpParser.readLine(InputStream)");
- return readLine(inputStream, "US-ASCII");
+ return readLine(inputStream, StandardCharsets.US_ASCII.name());
}
/**
@@ -237,6 +238,6 @@ public static HttpHeader[] parseHeaders(InputStream is, String charset) throws I
*/
public static HttpHeader[] parseHeaders(InputStream is) throws IOException {
LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)");
- return parseHeaders(is, "US-ASCII");
+ return parseHeaders(is, StandardCharsets.US_ASCII.name());
}
}
diff --git a/src/main/java/org/archive/util/ProcessUtils.java b/src/main/java/org/archive/util/ProcessUtils.java
index af792981..0a3eeb67 100644
--- a/src/main/java/org/archive/util/ProcessUtils.java
+++ b/src/main/java/org/archive/util/ProcessUtils.java
@@ -26,6 +26,8 @@
import java.util.logging.Level;
import java.util.logging.Logger;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Class to run an external process.
* @author stack
@@ -55,7 +57,7 @@ protected StreamGobbler(InputStream is, String name) {
public void run() {
try {
BufferedReader br =
- new BufferedReader(new InputStreamReader(this.is));
+ new BufferedReader(new InputStreamReader(this.is, UTF_8));
for (String line = null; (line = br.readLine()) != null;) {
this.sink.append(line);
}
diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java
index 6a7a53d7..9f10ec92 100644
--- a/src/main/java/org/archive/util/Recorder.java
+++ b/src/main/java/org/archive/util/Recorder.java
@@ -25,7 +25,9 @@
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
+import java.util.Locale;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -41,8 +43,6 @@
import org.archive.io.ReplayCharSequence;
import org.archive.io.ReplayInputStream;
-import com.google.common.base.Charsets;
-
/**
* Pairs together a RecordingInputStream and RecordingOutputStream
@@ -95,7 +95,7 @@ public class Recorder {
* (current behavior is for consistency with our prior but perhaps not
* optimal behavior)
*/
- protected Charset charset = Charsets.UTF_8;
+ protected Charset charset = StandardCharsets.UTF_8;
/** whether recording-input (ris) message-body is chunked */
protected boolean inputIsChunked = false;
@@ -338,8 +338,8 @@ public void setInputIsChunked(boolean chunked) {
* @param contentEncoding declared content-encoding of input recording.
*/
public void setContentEncoding(String contentEncoding) {
- String lowerCoding = contentEncoding.toLowerCase();
- if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase())) {
+ String lowerCoding = contentEncoding.toLowerCase(Locale.ROOT);
+ if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase(Locale.ROOT))) {
throw new IllegalArgumentException("contentEncoding unsupported: "+contentEncoding);
}
this.contentEncoding = lowerCoding;
diff --git a/src/main/java/org/archive/util/SURT.java b/src/main/java/org/archive/util/SURT.java
index 059b2ec6..99347e9f 100644
--- a/src/main/java/org/archive/util/SURT.java
+++ b/src/main/java/org/archive/util/SURT.java
@@ -27,11 +27,14 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
+import java.nio.charset.Charset;
import java.util.regex.Matcher;
import org.archive.url.URIException;
import org.archive.url.UsableURIFactory;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Sort-friendly URI Reordering Transform.
*
@@ -238,10 +241,10 @@ public static void main(String[] args) throws IOException {
InputStream in = args.length > 0 ? new BufferedInputStream(
new FileInputStream(args[0])) : System.in;
PrintStream out = args.length > 1 ? new PrintStream(
- new BufferedOutputStream(new FileOutputStream(args[1])))
+ new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name())
: System.out;
BufferedReader br =
- new BufferedReader(new InputStreamReader(in));
+ new BufferedReader(new InputStreamReader(in, Charset.defaultCharset()));
String line;
while((line = br.readLine())!=null) {
if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#"));
diff --git a/src/main/java/org/archive/util/SurtPrefixSet.java b/src/main/java/org/archive/util/SurtPrefixSet.java
index 6925cc83..b2f0ea4f 100644
--- a/src/main/java/org/archive/util/SurtPrefixSet.java
+++ b/src/main/java/org/archive/util/SurtPrefixSet.java
@@ -31,11 +31,14 @@
import java.io.PrintStream;
import java.io.Reader;
import java.util.Iterator;
+import java.util.Locale;
import org.archive.url.UsableURI;
import org.archive.util.iterator.LineReadingIterator;
import org.archive.util.iterator.RegexLineIterator;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
/**
* Specialized TreeSet for keeping a set of String prefixes.
*
@@ -70,7 +73,7 @@ public void importFrom(Reader r) {
while (iter.hasNext()) {
s = (String) iter.next();
- add(s.toLowerCase());
+ add(s.toLowerCase(Locale.ROOT));
}
}
@@ -145,7 +148,7 @@ public boolean considerAsAddDirective(String suri) {
}
if(u.indexOf("(")>0) {
// formal SURT prefix; toLowerCase just in case
- add(u.toLowerCase());
+ add(u.toLowerCase(Locale.ROOT));
} else {
// hostname/normal form URI from which
// to deduce SURT prefix
@@ -342,10 +345,10 @@ public static void main(String[] args) throws IOException {
InputStream in = args.length > 0 ? new BufferedInputStream(
new FileInputStream(args[0])) : System.in;
PrintStream out = args.length > 1 ? new PrintStream(
- new BufferedOutputStream(new FileOutputStream(args[1])))
+ new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name())
: System.out;
BufferedReader br =
- new BufferedReader(new InputStreamReader(in));
+ new BufferedReader(new InputStreamReader(in, UTF_8.name()));
String line;
while((line = br.readLine())!=null) {
if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#"));
diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java
index 98b471f8..627d411a 100644
--- a/src/main/java/org/archive/util/TextUtils.java
+++ b/src/main/java/org/archive/util/TextUtils.java
@@ -30,7 +30,6 @@
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
-import java.util.concurrent.ConcurrentMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -40,6 +39,8 @@
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class TextUtils {
private static final String FIRSTWORD = "^([^\\s]*).*$";
@@ -279,14 +280,11 @@ public static String exceptionToString(String message, Throwable e) {
* @param s String to escape
* @return URL-escaped string
*/
- @SuppressWarnings("deprecation")
public static String urlEscape(String s) {
try {
- return URLEncoder.encode(s,"UTF8");
+ return URLEncoder.encode(s, UTF_8.name());
} catch (UnsupportedEncodingException e) {
- // should be impossible; all JVMs must support UTF8
- // but have a fallback just in case
- return URLEncoder.encode(s);
+ return s;
}
}
@@ -296,14 +294,11 @@ public static String urlEscape(String s) {
* @param s String do unescape
* @return URL-unescaped String
*/
- @SuppressWarnings("deprecation")
public static String urlUnescape(String s) {
try {
- return URLDecoder.decode(s, "UTF8");
+ return URLDecoder.decode(s, UTF_8.name());
} catch (UnsupportedEncodingException e) {
- // should be impossible; all JVMs must support UTF8
- // but have a fallback just in case
- return URLDecoder.decode(s);
+ return s;
}
}
}
\ No newline at end of file
diff --git a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java
index de57278e..17d411fa 100644
--- a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java
+++ b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java
@@ -7,13 +7,14 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.archive.util.zip.GZIPMembersInputStream;
import com.google.common.io.ByteStreams;
public abstract class AbstractSeekableLineReader implements SeekableLineReader {
- public final static Charset UTF8 = Charset.forName("UTF-8");
+ public final static Charset UTF8 = StandardCharsets.UTF_8;
protected int blockSize = 128 * 1024;
diff --git a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java
index 76b7b2b9..45c2ee04 100644
--- a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java
+++ b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java
@@ -3,6 +3,7 @@
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.Charset;
import org.archive.url.WaybackURLKeyMaker;
import org.archive.util.binsearch.impl.MappedSeekableLineReaderFactory;
@@ -52,7 +53,7 @@ public static void main(String[] args) throws IOException {
SortedTextFile sorted = new SortedTextFile(factory);
sorted.setBinsearchBlockSize(blocksize);
- BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset()));
WaybackURLKeyMaker keymaker = new WaybackURLKeyMaker(true);
diff --git a/src/main/java/org/archive/util/binsearch/SortedTextFile.java b/src/main/java/org/archive/util/binsearch/SortedTextFile.java
index ab8118b7..bb4a1f66 100644
--- a/src/main/java/org/archive/util/binsearch/SortedTextFile.java
+++ b/src/main/java/org/archive/util/binsearch/SortedTextFile.java
@@ -2,12 +2,15 @@
import java.io.IOException;
import java.util.Comparator;
+import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.util.GeneralURIStreamFactory;
import org.archive.util.iterator.CloseableIterator;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
public class SortedTextFile {
public static class NumericComparator implements Comparator
@@ -142,14 +145,14 @@ public long binaryFindOffset(SeekableLineReader slr, final String key, Comparato
if (comparator.compare(key, line) > 0) {
if(LOGGER.isLoggable(Level.FINE)) {
- LOGGER.fine(String.format("Search(%d) (%s)/(%s) : After",
+ LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : After",
mid * blockSize, key,line));
}
min = mid;
} else {
if(LOGGER.isLoggable(Level.FINE)) {
- LOGGER.fine(String.format("Search(%d) (%s)/(%s) : Before",
+ LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : Before",
mid * blockSize, key,line));
}
max = mid;
@@ -370,7 +373,7 @@ private long searchOffset(SeekableLineReader slr,
String prev = null;
while(true) {
if (line != null) {
- offset += line.getBytes().length + 1;
+ offset += line.getBytes(UTF_8).length + 1;
}
line = slr.readLine();
if(line == null) break;
@@ -379,7 +382,7 @@ private long searchOffset(SeekableLineReader slr,
}
if (lessThan && prev != null) {
- offset -= prev.getBytes().length + 1;
+ offset -= prev.getBytes(UTF_8).length + 1;
}
return offset;
@@ -391,7 +394,7 @@ private CloseableIterator search(SeekableLineReader slr,
long min = binaryFindOffset(slr, key, comparator);
if (LOGGER.isLoggable(Level.FINE)) {
- LOGGER.fine(String.format("Aligning(%d)",min));
+ LOGGER.fine(String.format(Locale.ROOT, "Aligning(%d)",min));
}
slr.seek(min);
diff --git a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java
index 7ade0ad5..73e1fda8 100644
--- a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java
+++ b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java
@@ -5,6 +5,8 @@
import org.junit.jupiter.api.Test;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
public class DNSResponseParserTest {
@@ -20,7 +22,7 @@ public void testParse() throws DNSParseException, IOException {
}
private void verifyResults(String res, String date, String d[][]) throws DNSParseException, IOException {
ByteArrayInputStream is =
- new ByteArrayInputStream(res.getBytes("UTF-8"));
+ new ByteArrayInputStream(res.getBytes(UTF_8));
DNSResponse response = new DNSResponse();
parser.parse(is, response);
verifyResults(response,date,d);
diff --git a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java
index 25a5eaa7..13658bcb 100644
--- a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java
+++ b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java
@@ -10,7 +10,7 @@
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
-import java.nio.charset.StandardCharsets;
+import java.util.Locale;
import org.archive.format.gzip.GZIPMemberSeries;
import org.archive.format.gzip.GZIPSeriesMember;
@@ -18,6 +18,8 @@
import org.junit.jupiter.api.Test;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
public class ZipNumWriterTest {
@@ -28,16 +30,16 @@ public void testAddRecord() throws IOException {
File summ = File.createTempFile("test-znw",".summ");
main.deleteOnExit();
summ.deleteOnExit();
- System.out.format("Summ: %s\n", summ.getAbsolutePath());
+ System.out.format(Locale.ROOT, "Summ: %s\n", summ.getAbsolutePath());
int limit = 10;
ZipNumWriter znw = new ZipNumWriter(new FileOutputStream(main,false),
new FileOutputStream(summ,false), limit);
for(int i = 0; i < 1000; i++) {
- znw.addRecord(String.format("%06d\n",i).getBytes(StandardCharsets.UTF_8));
+ znw.addRecord(String.format(Locale.ROOT,"%06d\n",i).getBytes(UTF_8));
}
znw.close();
InputStreamReader isr =
- new InputStreamReader(new FileInputStream(summ), StandardCharsets.UTF_8);
+ new InputStreamReader(new FileInputStream(summ), UTF_8);
BufferedReader br = new BufferedReader(isr);
String line = null;
int count = 0;
diff --git a/src/test/java/org/archive/format/json/JSONViewTest.java b/src/test/java/org/archive/format/json/JSONViewTest.java
index aabbe7df..6d199025 100644
--- a/src/test/java/org/archive/format/json/JSONViewTest.java
+++ b/src/test/java/org/archive/format/json/JSONViewTest.java
@@ -1,5 +1,7 @@
package org.archive.format.json;
+import java.util.Locale;
+
import org.archive.util.TestUtils;
import org.json.JSONException;
import org.json.JSONObject;
@@ -17,16 +19,16 @@ public void testBytes() throws JSONException {
JSONObject o = new JSONObject();
o.append("name1", "val\\rue1");
String json = o.toString();
- System.out.format("once: (%s)\n",json);
+ System.out.format(Locale.ROOT, "once: (%s)\n", json);
JSONObject o2 = new JSONObject(json);
- System.out.format("twice: (%s)\n",o2.toString());
+ System.out.format(Locale.ROOT, "twice: (%s)\n", o2.toString());
byte b[] = new byte[2];
for(int i = 0; i < 256; i++) {
b[0] = (byte) i;
int gi = getInt(b);
- System.out.format("I(%d) gi(%d)\n",i,gi);
+ System.out.format(Locale.ROOT, "I(%d) gi(%d)\n", i, gi);
}
}
diff --git a/src/test/java/org/archive/format/text/html/CDATALexerTest.java b/src/test/java/org/archive/format/text/html/CDATALexerTest.java
index 856576ba..7c9f24f3 100644
--- a/src/test/java/org/archive/format/text/html/CDATALexerTest.java
+++ b/src/test/java/org/archive/format/text/html/CDATALexerTest.java
@@ -10,6 +10,8 @@
import static org.junit.jupiter.api.Assertions.*;
+import java.util.Locale;
+
public class CDATALexerTest {
CDATALexer l;
Node n;
@@ -102,7 +104,7 @@ public void testInJSComment() throws ParserException {
}
private void assertJSContentWorks(String js) throws ParserException {
- String html = String.format("",js);
+ String html = String.format(Locale.ROOT,"",js);
l = makeLexer(html);
assertFalse(l.inCSS());
assertFalse(l.inJS());
diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java
index 005e2c49..5d31b890 100644
--- a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java
+++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java
@@ -31,6 +31,8 @@
import org.archive.io.warc.WARCRecord;
import org.junit.jupiter.api.Test;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -68,7 +70,7 @@ public void testParseHttpHeadersInWARC() throws IOException {
final String hdr = warcHeader + HTTPHEADER + BODY;
- WARCRecord r = new WARCRecord(new ByteArrayInputStream(hdr.getBytes()),
+ WARCRecord r = new WARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)),
"READER_IDENTIFIER", 0, false, true);
HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true);
@@ -76,7 +78,7 @@ public void testParseHttpHeadersInWARC() throws IOException {
byte[] b = new byte[BODY.length()];
har.read(b);
- String bodyRead = new String(b);
+ String bodyRead = new String(b, UTF_8);
assertEquals(BODY, bodyRead);
assertHeaderCorrectlyParsed(har.getContentHeaders());
assertEquals(har.getHeader().getUrl(), url,
@@ -156,14 +158,14 @@ public String getVersion() {
}
};
- ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes()),
+ ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)),
arh, 0, false, true, false);
HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true);
har.skipHttpHeader();
byte[] b = new byte[BODY.length()];
har.read(b);
- String bodyRead = new String(b);
+ String bodyRead = new String(b, UTF_8);
assertEquals(BODY, bodyRead);
assertHeaderCorrectlyParsed(har.getContentHeaders());
}
@@ -175,14 +177,14 @@ public void testEasierParseHttpHeadersInARC() throws IOException {
+ " 192.168.0.1 20070515111004 text/html 167568\n";
final String hdr = arcHeader + HTTPHEADER + BODY;
- ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes()),
+ ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)),
"READER_IDENTIFIER", 0, false, true, false);
HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true);
har.skipHttpHeader();
byte[] b = new byte[BODY.length()];
har.read(b);
- String bodyRead = new String(b);
+ String bodyRead = new String(b, UTF_8);
assertEquals(BODY, bodyRead);
assertHeaderCorrectlyParsed(har.getContentHeaders());
assertEquals(har.getHeader().getUrl(), url, "failed to retrieve Url from metadata");
@@ -205,7 +207,7 @@ public void testNoheaderWARC() throws IOException {
String c = "WARC/0.12\r\nContent-Type: text/plain\r\n"
+ "Content-Length: " + b.length() + "\r\n\r\n" + b;
org.archive.io.warc.WARCRecord r = new org.archive.io.warc.WARCRecord(
- new ByteArrayInputStream(c.getBytes()), "READER_IDENTIFIER", 0,
+ new ByteArrayInputStream(c.getBytes(UTF_8)), "READER_IDENTIFIER", 0,
false, true);
HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true);
assertTrue(har.isStrict());
diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java
index 49160aa3..74e92024 100644
--- a/src/test/java/org/archive/io/RecordingInputStreamTest.java
+++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java
@@ -28,6 +28,8 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -55,7 +57,7 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException,
RecordingInputStream ris = new RecordingInputStream(16384, (new File(
tempDir, "testReadFullyOrUntil").getAbsolutePath()));
ByteArrayInputStream bais = new ByteArrayInputStream(
- "abcdefghijklmnopqrstuvwxyz".getBytes());
+ "abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8));
// test soft max
ris.open(bais);
ris.setLimits(10,0,0);
@@ -64,8 +66,9 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException,
ReplayInputStream res = ris.getReplayInputStream();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
res.readFullyTo(baos);
- assertEquals("abcdefg",new String(baos.toByteArray()),"soft max cutoff");
- // test hard max
+ assertEquals("abcdefg", new String(baos.toByteArray(), UTF_8),
+ "soft max cutoff");
+ // test hard max
bais.reset();
baos.reset();
ris.open(bais);
@@ -80,14 +83,14 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException,
ris.close();
res = ris.getReplayInputStream();
res.readFullyTo(baos);
- assertEquals("abcdefghijk",new String(baos.toByteArray()),
- "hard max cutoff");
+ assertEquals("abcdefghijk", new String(baos.toByteArray(), UTF_8),
+ "hard max cutoff");
// test timeout
PipedInputStream pin = new PipedInputStream();
PipedOutputStream pout = new PipedOutputStream(pin);
ris.open(pin);
exceptionThrown = false;
- trickle("abcdefghijklmnopqrstuvwxyz".getBytes(),pout);
+ trickle("abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8),pout);
int timeout = 200;
try {
ris.setLimits(0, timeout,0);
@@ -133,10 +136,10 @@ public void testAsOutputStream() throws IOException {
RecordingInputStream ris = new RecordingInputStream(16384, (new File(
tempDir, "testAsOutputStream").getAbsolutePath()));
ris.open(null);
- ris.asOutputStream().write("hello".getBytes());
+ ris.asOutputStream().write("hello".getBytes(UTF_8));
ris.close();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ris.getReplayInputStream().readFullyTo(baos);
- assertEquals("hello", baos.toString());
+ assertEquals("hello", baos.toString(UTF_8.name()));
}
}
diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java
index c94f8245..0dba910e 100644
--- a/src/test/java/org/archive/io/RecordingOutputStreamTest.java
+++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java
@@ -28,6 +28,8 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -266,61 +268,61 @@ public void testMessageBodyBegin() throws IOException {
ros.setSha1Digest();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n\nabcdefghij".getBytes());
+ ros.write("0123456789\n\nabcdefghij".getBytes(UTF_8));
assertEquals(12, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\r\n\r\nabcdefghij".getBytes());
+ ros.write("0123456789\r\n\r\nabcdefghij".getBytes(UTF_8));
assertEquals(14, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n\r\nabcdefghij".getBytes());
+ ros.write("0123456789\n\r\nabcdefghij".getBytes(UTF_8));
assertEquals(13, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n".getBytes());
+ ros.write("0123456789\n".getBytes(UTF_8));
assertEquals(-1, ros.getMessageBodyBegin());
- ros.write("\nabcdefghij".getBytes());
+ ros.write("\nabcdefghij".getBytes(UTF_8));
assertEquals(12, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n".getBytes());
+ ros.write("0123456789\n".getBytes(UTF_8));
assertEquals(-1, ros.getMessageBodyBegin());
- ros.write("\r\nabcdefghij".getBytes());
+ ros.write("\r\nabcdefghij".getBytes(UTF_8));
assertEquals(13, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n\r".getBytes());
+ ros.write("0123456789\n\r".getBytes(UTF_8));
assertEquals(-1, ros.getMessageBodyBegin());
- ros.write("\nabcdefghij".getBytes());
+ ros.write("\nabcdefghij".getBytes(UTF_8));
assertEquals(13, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789".getBytes());
+ ros.write("0123456789".getBytes(UTF_8));
ros.write('\n');
assertEquals(-1, ros.getMessageBodyBegin());
- ros.write("\nabcdefghij".getBytes());
+ ros.write("\nabcdefghij".getBytes(UTF_8));
assertEquals(12, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789".getBytes());
+ ros.write("0123456789".getBytes(UTF_8));
ros.write('\n');
ros.write('\n');
- for (int b: "abcdefghij".getBytes()) {
+ for (int b: "abcdefghij".getBytes(UTF_8)) {
ros.write(b);
}
assertEquals(12, ros.getMessageBodyBegin());
@@ -328,11 +330,11 @@ public void testMessageBodyBegin() throws IOException {
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789".getBytes());
+ ros.write("0123456789".getBytes(UTF_8));
ros.write('\n');
ros.write('\r');
ros.write('\n');
- for (int b: "abcdefghij".getBytes()) {
+ for (int b: "abcdefghij".getBytes(UTF_8)) {
ros.write(b);
}
assertEquals(13, ros.getMessageBodyBegin());
@@ -340,17 +342,17 @@ public void testMessageBodyBegin() throws IOException {
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n".getBytes());
+ ros.write("0123456789\n".getBytes(UTF_8));
ros.write('\n');
- ros.write("abcdefghij".getBytes());
+ ros.write("abcdefghij".getBytes(UTF_8));
assertEquals(12, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
ros.open(new ByteArrayOutputStream());
- ros.write("0123456789\n\r".getBytes());
+ ros.write("0123456789\n\r".getBytes(UTF_8));
ros.write('\n');
- ros.write("abcdefghij".getBytes());
+ ros.write("abcdefghij".getBytes(UTF_8));
assertEquals(13, ros.getMessageBodyBegin());
assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue()));
ros.close();
diff --git a/src/test/java/org/archive/io/ReplayCharSequenceTest.java b/src/test/java/org/archive/io/ReplayCharSequenceTest.java
index 3234259c..3935837b 100644
--- a/src/test/java/org/archive/io/ReplayCharSequenceTest.java
+++ b/src/test/java/org/archive/io/ReplayCharSequenceTest.java
@@ -25,17 +25,21 @@
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.util.Date;
+import java.util.Locale;
import java.util.Random;
import java.util.logging.Logger;
import org.archive.util.FileUtils;
-import com.google.common.base.Charsets;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.*;
/**
@@ -133,7 +137,7 @@ public void testGetReplayCharSequenceMultiByteZeroOffset()
RecordingOutputStream ros = writeTestStream(
regularBuffer,MULTIPLIER,
"testGetReplayCharSequenceMultiByteZeroOffset",MULTIPLIER);
- ReplayCharSequence rcs = getReplayCharSequence(ros,Charsets.UTF_8);
+ ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8);
for (int i = 0; i < MULTIPLIER; i++) {
accessingCharacters(rcs);
@@ -143,7 +147,7 @@ public void testGetReplayCharSequenceMultiByteZeroOffset()
@Test
public void testReplayCharSequenceByteToString() throws IOException {
String fileContent = "Some file content";
- byte [] buffer = fileContent.getBytes();
+ byte [] buffer = fileContent.getBytes(UTF_8);
RecordingOutputStream ros = writeTestStream(
buffer,1,
"testReplayCharSequenceByteToString.txt",0);
@@ -179,7 +183,7 @@ public void testSingleByteEncodings() throws IOException {
String latin1String = new String(bytes, "latin1");
RecordingOutputStream ros = writeTestStream(
bytes, 1, "testSingleByteEncodings-latin1.txt", 0);
- ReplayCharSequence rcs = getReplayCharSequence(ros,Charsets.ISO_8859_1);
+ ReplayCharSequence rcs = getReplayCharSequence(ros, ISO_8859_1);
String result = rcs.toString();
logger.fine("latin1[0] " + toHexString(latin1String));
logger.fine("latin1[1] " + toHexString(result));
@@ -207,7 +211,7 @@ public void testSingleByteEncodings() throws IOException {
@Test
public void testReplayCharSequenceByteToStringOverflow() throws IOException {
String fileContent = "Some file content. "; // ascii
- byte [] buffer = fileContent.getBytes();
+ byte [] buffer = fileContent.getBytes(UTF_8);
RecordingOutputStream ros = writeTestStream(
buffer,1,
"testReplayCharSequenceByteToStringOverflow.txt",1);
@@ -217,8 +221,8 @@ public void testReplayCharSequenceByteToStringOverflow() throws IOException {
// both encodings because they exercise different code paths. UTF-8 is
// decoded to UTF-16 while windows-1252 is memory mapped directly. See
// GenericReplayCharSequence
- ReplayCharSequence rcsUtf8 = getReplayCharSequence(ros,Charsets.UTF_8);
- ReplayCharSequence rcs1252 = getReplayCharSequence(ros,Charset.forName("windows-1252"));
+ ReplayCharSequence rcsUtf8 = getReplayCharSequence(ros, UTF_8);
+ ReplayCharSequence rcs1252 = getReplayCharSequence(ros, Charset.forName("windows-1252"));
String result = rcsUtf8.toString();
assertEquals(expectedContent, result, "Strings don't match");
@@ -242,7 +246,7 @@ public void testReplayCharSequenceByteToStringMulti() throws IOException {
buffer,1,
"testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1);
for (int i = 0; i < 3; i++) {
- ReplayCharSequence rcs = getReplayCharSequence(ros,StandardCharsets.UTF_8);
+ ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8);
String result = rcs.toString();
assertEquals(result, expectedResult, "Strings don't match");
rcs.close();
@@ -255,8 +259,7 @@ public void testReplayCharSequenceByteToStringMulti() throws IOException {
@Disabled
public void xestHugeReplayCharSequence() throws IOException {
String fileContent = "01234567890123456789";
- String characterEncoding = "ascii";
- byte[] buffer = fileContent.getBytes(characterEncoding);
+ byte[] buffer = fileContent.getBytes(US_ASCII);
long reps = (long) Integer.MAX_VALUE / (long) buffer.length + 1000000l;
@@ -264,7 +267,7 @@ public void xestHugeReplayCharSequence() throws IOException {
+ " bytes to testHugeReplayCharSequence.txt");
RecordingOutputStream ros = writeTestStream(buffer, 0,
"testHugeReplayCharSequence.txt", reps);
- ReplayCharSequence rcs = getReplayCharSequence(ros,Charset.forName(characterEncoding));
+ ReplayCharSequence rcs = getReplayCharSequence(ros, US_ASCII);
if (reps * fileContent.length() > (long) Integer.MAX_VALUE) {
assertEquals(Integer.MAX_VALUE, rcs.length(), "ReplayCharSequence has wrong length (length()="
@@ -283,7 +286,7 @@ public void xestHugeReplayCharSequence() throws IOException {
// NumberFormat.getInstance().format(index));
assertEquals(fileContent.charAt(index % fileContent.length()),
rcs.charAt(index), "Characters don't match (index="
- + NumberFormat.getInstance().format(index) + ")");
+ + NumberFormat.getInstance(Locale.ROOT).format(index) + ")");
}
// check that out of bounds indices throw exception
@@ -307,7 +310,7 @@ public void xestHugeReplayCharSequence() throws IOException {
// NumberFormat.getInstance().format(index));
assertEquals(fileContent.charAt(index % fileContent.length()),
rcs.charAt(index), "Characters don't match (index="
- + NumberFormat.getInstance().format(index) + ")");
+ + NumberFormat.getInstance(Locale.ROOT).format(index) + ")");
}
}
diff --git a/src/test/java/org/archive/io/RepositionableInputStreamTest.java b/src/test/java/org/archive/io/RepositionableInputStreamTest.java
index 228c9042..4aad11b9 100644
--- a/src/test/java/org/archive/io/RepositionableInputStreamTest.java
+++ b/src/test/java/org/archive/io/RepositionableInputStreamTest.java
@@ -21,12 +21,15 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
public class RepositionableInputStreamTest {
@@ -38,7 +41,7 @@ public class RepositionableInputStreamTest {
@BeforeEach
protected void setUp() throws Exception {
this.testFile = new File(tempDir, this.getClass().getName());
- PrintWriter pw = new PrintWriter(new FileOutputStream(testFile));
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(testFile), UTF_8));
for (int i = 0; i < 100; i++) {
pw.print(LINE);
}
@@ -63,7 +66,7 @@ public void testname() throws Exception {
long offset = 0;
for (int i = 0; i < 10; i++) {
ris.read(bytes, 0, LINE.length());
- assertEquals(LINE, new String(bytes));
+ assertEquals(LINE, new String(bytes, UTF_8));
offset += LINE.length();
assertEquals(offset, ris.position());
}
diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
index 954da636..f6820337 100644
--- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
+++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
@@ -30,6 +30,8 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.archive.format.arc.ARCConstants.*;
@@ -51,7 +53,7 @@ public void testARCWriterPool()
WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE];
final String CONTENT = "Any old content";
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- baos.write(CONTENT.getBytes());
+ baos.write(CONTENT.getBytes(UTF_8));
for (int i = 0; i < MAX_ACTIVE; i++) {
writers[i] = pool.borrowFile();
assertEquals(i + 1, pool.getNumActive(), "Number active");
@@ -81,7 +83,7 @@ public void testInvalidate() throws Exception {
WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE];
final String CONTENT = "Any old content";
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- baos.write(CONTENT.getBytes());
+ baos.write(CONTENT.getBytes(UTF_8));
for (int i = 0; i < MAX_ACTIVE; i++) {
writers[i] = pool.borrowFile();
assertEquals(i + 1, pool.getNumActive(), "Number active");
@@ -124,4 +126,4 @@ private WriterPoolSettings getSettings(final boolean isCompressed) {
Arrays.asList(files),
null);
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java
index ca300697..f6c48462 100644
--- a/src/test/java/org/archive/io/arc/ARCWriterTest.java
+++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java
@@ -47,6 +47,8 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.*;
import static org.archive.format.arc.ARCConstants.*;
@@ -122,11 +124,11 @@ protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index)
// Start the record with an arbitrary 14-digit date per RFC2540
String now = ArchiveUtils.get14DigitDate();
int recordLength = 0;
- byte[] record = (getContent(indexStr)).getBytes();
+ byte[] record = (getContent(indexStr)).getBytes(UTF_8);
recordLength += record.length;
baos.write(record);
// Add the newline between records back in
- baos.write("\n".getBytes());
+ baos.write("\n".getBytes(UTF_8));
recordLength += 1;
arcWriter.write("http://www.one.net/id=" + indexStr, "text/html",
"0.1.2.3", Long.parseLong(now), recordLength, baos);
@@ -260,7 +262,7 @@ public void testWriteRecordCompressed() throws IOException {
}
public void testWriteGiantRecord() throws IOException {
- PrintStream dummyStream = new PrintStream(new NullOutputStream());
+ PrintStream dummyStream = new PrintStream(new NullOutputStream(), false, UTF_8.name());
ARCWriter arcWriter =
new ARCWriter(
SERIAL_NO,
@@ -305,7 +307,7 @@ protected CorruptibleARCWriter createARCWriter(String name, boolean compress) {
protected static ByteArrayInputStream getBais(String str)
throws IOException {
- return new ByteArrayInputStream(str.getBytes());
+ return new ByteArrayInputStream(str.getBytes(UTF_8));
}
/**
@@ -417,7 +419,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict)
ByteArrayInputStream bais = getBais(content+"SOME TRAILING BYTES");
writeRecord(writer, SOME_URL, "text/html",
content.length(), bais);
- writer.setEndJunk("SOME TRAILING BYTES".getBytes());
+ writer.setEndJunk("SOME TRAILING BYTES".getBytes(UTF_8));
writeRecord(writer, SOME_URL, "text/html",
content.length(), getBais(content));
} finally {
@@ -429,7 +431,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict)
PrintStream origErr = System.err;
ARCReader r = null;
try {
- System.setErr(new PrintStream(os));
+ System.setErr(new PrintStream(os, false, UTF_8.name()));
r = ARCReaderFactory.get(writer.getFile());
r.setStrict(strict);
@@ -438,7 +440,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict)
// Make sure we get the warning string which complains about the
// trailing bytes.
- String err = os.toString();
+ String err = os.toString(UTF_8.name());
assertTrue(err.startsWith("WARNING") &&
(err.indexOf("Record STARTING at") > 0), "No message " + err);
r.close();
@@ -494,7 +496,7 @@ protected void lengthTooLong(String name, boolean compress,
PrintStream origErr = System.err;
ARCReader r = null;
try {
- System.setErr(new PrintStream(os));
+ System.setErr(new PrintStream(os, false, UTF_8.name()));
r = ARCReaderFactory.get(writer.getFile());
r.setStrict(strict);
@@ -503,7 +505,7 @@ protected void lengthTooLong(String name, boolean compress,
// Make sure we get the warning string which complains about the
// trailing bytes.
- String err = os.toString();
+ String err = os.toString(UTF_8.name());
assertTrue(err.startsWith("WARNING Premature EOF before end-of-record"),
"No message " + err);
} finally {
@@ -518,7 +520,7 @@ public void testGapError() throws IOException {
String content = getContent();
// Make a 'weird' RIS that returns bad 'remaining' length
// awhen remaining should be 0
- ReplayInputStream ris = new ReplayInputStream(content.getBytes(),
+ ReplayInputStream ris = new ReplayInputStream(content.getBytes(UTF_8),
content.length(), null) {
public long remaining() {
return (super.remaining()==0) ? -1 : super.remaining();
diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java
index c0ace5f0..d2684fa4 100644
--- a/src/test/java/org/archive/io/warc/WARCWriterTest.java
+++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java
@@ -42,6 +42,8 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.*;
import static org.archive.format.warc.WARCConstants.*;
@@ -228,7 +230,7 @@ protected int writeRandomHTTPRecord(WARCWriter w, int index)
String indexStr = Integer.toString(index);
recordInfo.setUrl("http://www.one.net/id=" + indexStr);
- byte[] record = (getContent(indexStr)).getBytes();
+ byte[] record = (getContent(indexStr)).getBytes(UTF_8);
recordInfo.setContentLength((long) record.length);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -385,7 +387,7 @@ protected WARCWriter createWARCWriter(String name,
protected static ByteArrayOutputStream getBaos(String str)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- baos.write(str.getBytes());
+ baos.write(str.getBytes(UTF_8));
return baos;
}
@@ -524,4 +526,4 @@ public void testArcRecordOffsetReads() throws Exception {
assertTrue(totalRead > 0);
}
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java
index 157499ff..e34d4e6f 100644
--- a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java
+++ b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.logging.Logger;
import org.archive.extract.ExtractingResourceFactoryMapper;
@@ -52,7 +53,7 @@ public void testHandleStyleNodeExceptions() throws Exception {
TextNode tn = new TextNode(css);
epo.handleStyleNode(tn);
} catch(Exception e) {
- System.err.format("And the winner is....(%s)\n", css);
+ System.err.format(Locale.ROOT, "And the winner is....(%s)\n", css);
e.printStackTrace();
except = true;
throw e;
diff --git a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java
index 3b4193b9..a3c8c1c9 100644
--- a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java
+++ b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java
@@ -1,5 +1,7 @@
package org.archive.resource.html;
+import java.util.Locale;
+
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@@ -59,7 +61,7 @@ private void appendStrArr(JSONObject o, String a[][]) throws JSONException {
}
private void appendStrArr2(JSONObject o, String k, String... a) throws JSONException {
- System.out.format("A length(%d)\n", a.length);
+ System.out.format(Locale.ROOT, "A length(%d)\n", a.length);
JSONObject n = new JSONObject();
if((a.length & 1) == 1) {
throw new IllegalArgumentException();
diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java
index 19b1984f..45989416 100644
--- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java
+++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java
@@ -1,6 +1,7 @@
package org.archive.url;
import java.net.URISyntaxException;
+import java.util.Locale;
import org.junit.jupiter.api.Test;
@@ -204,12 +205,12 @@ public void testFoo() {
String path = "/a/b/c/";
String[] paths = path.split("/",-1);
for(String p : paths) {
- System.out.format("(%s)",p);
+ System.out.format(Locale.ROOT, "(%s)", p);
}
System.out.println();
paths = path.split("/");
for(String p : paths) {
- System.out.format("(%s)",p);
+ System.out.format(Locale.ROOT, "(%s)", p);
}
System.out.println();
}
diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java
index bc8fc3a5..c942a260 100644
--- a/src/test/java/org/archive/url/URLParserTest.java
+++ b/src/test/java/org/archive/url/URLParserTest.java
@@ -3,10 +3,14 @@
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URLDecoder;
+import java.util.Locale;
import com.google.common.net.InetAddresses;
+
import org.junit.jupiter.api.Test;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
public class URLParserTest {
@@ -15,7 +19,7 @@ public void testGuava() throws URIException, UnsupportedEncodingException {
Long l = Long.parseLong("3279880203");
int i2 = l.intValue();
// int i = Integer.decode("3279880203");
- System.err.format("FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress());
+ System.err.format(Locale.ROOT, "FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress());
}
@Test
@@ -30,7 +34,7 @@ public void testAddDefaultSchemeIfNeeded() {
@Test
public void testParse() throws UnsupportedEncodingException, URISyntaxException {
- System.out.format("O(%s) E(%s)\n","%66",URLDecoder.decode("%66","UTF-8"));
+ System.out.format(Locale.ROOT, "O(%s) E(%s)\n","%66", URLDecoder.decode("%66", UTF_8.name()));
checkParse("http://www.archive.org/index.html#foo",
null, "http", null, null, "www.archive.org", -1, "/index.html", null, "foo",
"http://www.archive.org/index.html#foo", "/index.html");
@@ -96,7 +100,7 @@ private void checkParse(String s, String opaque, String scheme, String authUser,
String authPass, String host, int port, String path,
String query, String fragment, String urlString, String pathQuery) throws URISyntaxException {
HandyURL h = URLParser.parse(s);
- System.out.format("Input:(%s)\nHandyURL\t%s\n",s,h.toDebugString());
+ System.out.format(Locale.ROOT, "Input:(%s)\nHandyURL\t%s\n", s, h.toDebugString());
assertEquals(scheme, h.getScheme());
assertEquals(authUser, h.getAuthUser());
assertEquals(authPass, h.getAuthPass());
diff --git a/src/test/java/org/archive/url/URLRegexTransformerTest.java b/src/test/java/org/archive/url/URLRegexTransformerTest.java
index 73c43f96..d5c98f6a 100644
--- a/src/test/java/org/archive/url/URLRegexTransformerTest.java
+++ b/src/test/java/org/archive/url/URLRegexTransformerTest.java
@@ -5,6 +5,8 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.Locale;
+
public class URLRegexTransformerTest {
@Test
@@ -49,7 +51,7 @@ public void testStripPathSessionID() {
private static void checkStripPathSessionID(String orig, String want) {
String got = URLRegexTransformer.stripPathSessionID(orig);
- assertEquals(want, got, String.format("FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want));
+ assertEquals(want, got, String.format(Locale.ROOT, "FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want));
}
// private static final String BASE = "http://www.archive.org/index.html";
diff --git a/src/test/java/org/archive/util/ByteOpTest.java b/src/test/java/org/archive/util/ByteOpTest.java
index 49781c36..eb89353e 100644
--- a/src/test/java/org/archive/util/ByteOpTest.java
+++ b/src/test/java/org/archive/util/ByteOpTest.java
@@ -4,6 +4,7 @@
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
+import java.util.Locale;
import com.google.common.io.LittleEndianDataOutputStream;
@@ -18,10 +19,10 @@ public void testReadShort() throws IOException {
byte a[] = new byte[]{0,1,2,3};
ByteArrayInputStream bais = new ByteArrayInputStream(a);
int bos = ByteOp.readShort(bais);
- System.out.format("BO.Read short(%d)\n", bos);
+ System.out.format(Locale.ROOT, "BO.Read short(%d)\n", bos);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(a));
int disv = dis.readUnsignedShort();
- System.out.format("DI.Read short(%d)\n", disv);
+ System.out.format(Locale.ROOT, "DI.Read short(%d)\n", disv);
for(int i = 0; i < 256 * 256; i++) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(2);
LittleEndianDataOutputStream dos = new LittleEndianDataOutputStream(baos);
diff --git a/src/test/java/org/archive/util/CrossProductTest.java b/src/test/java/org/archive/util/CrossProductTest.java
index 211fa65e..a487ab15 100644
--- a/src/test/java/org/archive/util/CrossProductTest.java
+++ b/src/test/java/org/archive/util/CrossProductTest.java
@@ -2,10 +2,12 @@
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import org.junit.jupiter.api.Test;
public class CrossProductTest {
+
private void dumpC(List