Skip to content

Commit d241d36

Browse files
committed
Add PageTitle class to canonicalize page names
1 parent 691acbf commit d241d36

File tree

12 files changed

+234
-50
lines changed

12 files changed

+234
-50
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package fr.free.nrw.commons;
2+
3+
import android.support.test.runner.AndroidJUnit4;
4+
5+
import org.junit.Assert;
6+
import org.junit.Test;
7+
import org.junit.runner.RunWith;
8+
9+
import static org.hamcrest.CoreMatchers.is;
10+
11+
// TODO: use Robolectric and make it runnable without a connected device
12+
@RunWith(AndroidJUnit4.class)
13+
public class MediaTest {
14+
@Test public void displayTitleShouldStripExtension() {
15+
Media m = new Media("File:Example.jpg");
16+
Assert.assertThat(m.getDisplayTitle(), is("Example"));
17+
}
18+
19+
@Test public void displayTitleShouldUseSpaceForUnderscore() {
20+
Media m = new Media("File:Example 1_2.jpg");
21+
Assert.assertThat(m.getDisplayTitle(), is("Example 1 2"));
22+
}
23+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package fr.free.nrw.commons;
2+
3+
import android.support.test.runner.AndroidJUnit4;
4+
5+
import org.junit.Assert;
6+
import org.junit.Test;
7+
import org.junit.runner.RunWith;
8+
9+
import java.net.URLEncoder;
10+
11+
import static org.hamcrest.CoreMatchers.is;
12+
13+
// TODO: use Robolectric and make it runnable without a connected device
14+
@RunWith(AndroidJUnit4.class)
15+
public class PageTitleTest {
16+
@Test public void displayTextShouldNotBeUnderscored() {
17+
Assert.assertThat(new PageTitle("Ex_1 ").getDisplayText(),
18+
is("Ex 1"));
19+
}
20+
21+
@Test public void moreThanTwoColons() {
22+
Assert.assertThat(new PageTitle("File:sample:a.jpg").getPrefixedText(),
23+
is("File:Sample:a.jpg"));
24+
}
25+
26+
@Test public void getTextShouldReturnWithoutNamespace() {
27+
Assert.assertThat(new PageTitle("File:sample.jpg").getText(),
28+
is("Sample.jpg"));
29+
}
30+
31+
32+
@Test public void capitalizeNameAfterNamespace() {
33+
Assert.assertThat(new PageTitle("File:sample.jpg").getPrefixedText(),
34+
is("File:Sample.jpg"));
35+
}
36+
37+
@Test public void prefixedTextShouldBeUnderscored() {
38+
Assert.assertThat(new PageTitle("Ex 1 ").getPrefixedText(),
39+
is("Ex_1"));
40+
}
41+
42+
@Test public void getMobileUriForTest() {
43+
Assert.assertThat(new PageTitle("Test").getMobileUri().toString(),
44+
is("https://commons.m.wikimedia.org/wiki/Test"));
45+
}
46+
47+
@Test public void spaceBecomesUnderscoreInUri() {
48+
Assert.assertThat(new PageTitle("File:Ex 1.jpg").getCanonicalUri().toString(),
49+
is("https://commons.wikimedia.org/wiki/File:Ex_1.jpg"));
50+
}
51+
52+
@Test public void leaveSubpageNamesUncapitalizedInUri() {
53+
Assert.assertThat(new PageTitle("User:Ex/subpage").getCanonicalUri().toString(),
54+
is("https://commons.wikimedia.org/wiki/User:Ex/subpage"));
55+
}
56+
57+
@Test public void unicodeUri() throws Throwable {
58+
Assert.assertThat(new PageTitle("User:例").getCanonicalUri().toString(),
59+
is("https://commons.wikimedia.org/wiki/User:" + URLEncoder.encode("例", "utf-8")));
60+
}
61+
}

app/src/main/java/fr/free/nrw/commons/LicenseList.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import android.app.Activity;
44
import android.content.res.Resources;
5+
import android.support.annotation.Nullable;
56

67
import org.xmlpull.v1.XmlPullParser;
78

@@ -42,10 +43,11 @@ public License get(String key) {
4243
return licenses.get(key);
4344
}
4445

46+
@Nullable
4547
public License licenseForTemplate(String template) {
46-
String ucTemplate = Utils.capitalize(template);
48+
String ucTemplate = new PageTitle(template).getDisplayText();
4749
for (License license : values()) {
48-
if (ucTemplate.equals(Utils.capitalize(license.getTemplate()))) {
50+
if (ucTemplate.equals(new PageTitle(license.getTemplate()).getDisplayText())) {
4951
return license;
5052
}
5153
}

app/src/main/java/fr/free/nrw/commons/Media.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public String getDisplayTitle() {
4747
return "";
4848
}
4949
// FIXME: Gross hack bercause my regex skills suck maybe or I am too lazy who knows
50-
String title = filename.replaceFirst("^File:", "");
50+
String title = getFilePageTitle().getDisplayText().replaceFirst("^File:", "");
5151
Matcher matcher = displayTitlePattern.matcher(title);
5252
if(matcher.matches()) {
5353
return matcher.group(1);
@@ -56,13 +56,8 @@ public String getDisplayTitle() {
5656
}
5757
}
5858

59-
public String getDescriptionUrl() {
60-
// HACK! Geez
61-
return CommonsApplication.HOME_URL + "File:" + Utils.urlEncode(getFilename().replace("File:", "").replace(" ", "_"));
62-
}
63-
64-
public String getMobileDescriptionUrl() {
65-
return CommonsApplication.MOBILE_HOME_URL + "File:" + Utils.urlEncode(getFilename().replace("File:", "").replace(" ", "_"));
59+
public PageTitle getFilePageTitle() {
60+
return new PageTitle("File:" + getFilename().replaceFirst("^File:", ""));
6661
}
6762

6863
public Uri getLocalUri() {

app/src/main/java/fr/free/nrw/commons/MediaDataExtractor.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,14 @@ private void processWikiParseTree(String source) throws IOException {
163163
}
164164
}
165165

166-
private Node findTemplate(Element parentNode, String title) throws IOException {
167-
String ucTitle= Utils.capitalize(title);
166+
private Node findTemplate(Element parentNode, String title_) throws IOException {
167+
String title= new PageTitle(title_).getDisplayText();
168168
NodeList nodes = parentNode.getChildNodes();
169169
for (int i = 0, length = nodes.getLength(); i < length; i++) {
170170
Node node = nodes.item(i);
171171
if (node.getNodeName().equals("template")) {
172172
String foundTitle = getTemplateTitle(node);
173-
if (Utils.capitalize(foundTitle).equals(ucTitle)) {
173+
if (title.equals(new PageTitle(foundTitle).getDisplayText())) {
174174
return node;
175175
}
176176
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package fr.free.nrw.commons;
2+
3+
import android.net.Uri;
4+
import android.support.annotation.NonNull;
5+
6+
public class PageTitle {
7+
private final String namespace;
8+
private final String titleKey;
9+
10+
/**
11+
* Construct from a namespace-prefixed page name.
12+
* @param prefixedText namespace-prefixed page name
13+
*/
14+
public PageTitle(@NonNull String prefixedText) {
15+
String[] segments = prefixedText.trim().replace(" ", "_").split(":", 2);
16+
17+
// canonicalize and capitalize page title as done by MediaWiki
18+
if (segments.length == 2) {
19+
// TODO: canonicalize and capitalize namespace as well
20+
// see https://www.mediawiki.org/wiki/Manual:Title.php#Canonical_forms
21+
namespace = segments[0];
22+
titleKey = Utils.capitalize(segments[1]);
23+
} else {
24+
namespace = "";
25+
titleKey = Utils.capitalize(segments[0]);
26+
}
27+
}
28+
29+
/**
30+
* Get the canonicalized title for displaying (such as "File:My example.jpg").
31+
*
32+
* @return canonical title
33+
*/
34+
@NonNull
35+
public String getPrefixedText() {
36+
if (namespace.isEmpty()) {
37+
return titleKey;
38+
} else {
39+
return namespace + ":" + titleKey;
40+
}
41+
}
42+
43+
/**
44+
* Get the canonical title for DB and URLs (such as "File:My_example.jpg").
45+
*
46+
* @return canonical title
47+
*/
48+
@NonNull
49+
public String getDisplayText() {
50+
return getPrefixedText().replace("_", " ");
51+
}
52+
53+
/**
54+
* Convert to a URI
55+
* (such as "https://commons.wikimedia.org/wiki/File:My_example.jpg").
56+
*
57+
* @return URI
58+
*/
59+
@NonNull
60+
public Uri getCanonicalUri() {
61+
String uriStr = CommonsApplication.HOME_URL + Uri.encode(getPrefixedText(), ":/");
62+
return Uri.parse(uriStr);
63+
}
64+
65+
66+
/**
67+
* Convert to a mobile URI
68+
* (such as "https://commons.m.wikimedia.org/wiki/File:My_example.jpg").
69+
*
70+
* @return URI
71+
*/
72+
@NonNull
73+
public Uri getMobileUri() {
74+
String uriStr = CommonsApplication.MOBILE_HOME_URL + Uri.encode(getPrefixedText(), ":/");
75+
return Uri.parse(uriStr);
76+
}
77+
78+
/**
79+
* Get the canonical title without namespace.
80+
* @return title
81+
*/
82+
@NonNull
83+
public String getText() {
84+
return titleKey;
85+
}
86+
87+
@Override
88+
public String toString() {
89+
return getPrefixedText();
90+
}
91+
}

app/src/main/java/fr/free/nrw/commons/Utils.java

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
package fr.free.nrw.commons;
22

33
import android.content.Context;
4-
import android.net.Uri;
54
import android.os.Build;
65
import android.preference.PreferenceManager;
76
import android.text.Html;
87
import android.text.Spanned;
98

10-
import fr.free.nrw.commons.settings.Prefs;
11-
import timber.log.Timber;
9+
import org.apache.commons.codec.binary.Hex;
10+
import org.apache.commons.codec.digest.DigestUtils;
11+
import org.w3c.dom.Node;
12+
import org.xmlpull.v1.XmlPullParser;
13+
import org.xmlpull.v1.XmlPullParserException;
1214

1315
import java.io.BufferedInputStream;
1416
import java.io.IOException;
1517
import java.io.InputStream;
1618
import java.io.StringWriter;
1719
import java.io.UnsupportedEncodingException;
1820
import java.math.BigInteger;
21+
import java.net.URLEncoder;
1922
import java.security.MessageDigest;
2023
import java.security.NoSuchAlgorithmException;
2124
import java.text.ParseException;
@@ -34,12 +37,8 @@
3437
import javax.xml.transform.dom.DOMSource;
3538
import javax.xml.transform.stream.StreamResult;
3639

37-
import org.apache.commons.codec.binary.Hex;
38-
import org.apache.commons.codec.digest.DigestUtils;
39-
import org.apache.commons.codec.net.URLCodec;
40-
import org.w3c.dom.Node;
41-
import org.xmlpull.v1.XmlPullParser;
42-
import org.xmlpull.v1.XmlPullParserException;
40+
import fr.free.nrw.commons.settings.Prefs;
41+
import timber.log.Timber;
4342

4443

4544
public class Utils {
@@ -126,7 +125,7 @@ public static String toMWDate(Date date) {
126125
}
127126

128127
public static String makeThumbBaseUrl(String filename) {
129-
String name = filename.replaceFirst("File:", "").replace(" ", "_");
128+
String name = new PageTitle(filename).getPrefixedText();
130129
String sha = new String(Hex.encodeHex(DigestUtils.md5(name)));
131130
return String.format("%s/%s/%s/%s", CommonsApplication.IMAGE_URL_BASE, sha.substring(0, 1), sha.substring(0, 2), urlEncode(name));
132131
}
@@ -153,11 +152,9 @@ public static String getStringFromDOM(Node dom) {
153152
return outputStream.toString();
154153
}
155154

156-
private static final URLCodec urlCodec = new URLCodec();
157-
158155
public static String urlEncode(String url) {
159156
try {
160-
return urlCodec.encode(url, "utf-8");
157+
return URLEncoder.encode(url, "utf-8");
161158
} catch (UnsupportedEncodingException e) {
162159
throw new RuntimeException(e);
163160
}
@@ -232,12 +229,6 @@ public static String licenseUrlFor(String license) {
232229
throw new RuntimeException("Unrecognized license value: " + license);
233230
}
234231

235-
public static Uri uriForWikiPage(String name) {
236-
String underscored = name.trim().replace(" ", "_");
237-
String uriStr = CommonsApplication.HOME_URL + urlEncode(underscored);
238-
return Uri.parse(uriStr);
239-
}
240-
241232
/**
242233
* Fast-forward an XmlPullParser to the next instance of the given element
243234
* in the input stream (namespaced).

app/src/main/java/fr/free/nrw/commons/auth/LoginActivity.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
import android.widget.Button;
1616
import android.widget.EditText;
1717
import android.widget.TextView;
18-
1918
import android.widget.Toast;
20-
import fr.free.nrw.commons.*;
19+
20+
import fr.free.nrw.commons.BuildConfig;
21+
import fr.free.nrw.commons.CommonsApplication;
22+
import fr.free.nrw.commons.PageTitle;
23+
import fr.free.nrw.commons.R;
24+
import fr.free.nrw.commons.WelcomeActivity;
2125
import fr.free.nrw.commons.contributions.ContributionsActivity;
2226
import timber.log.Timber;
2327

@@ -158,7 +162,7 @@ private LoginTask getLoginTask() {
158162
* @return String canonicial username
159163
*/
160164
private String canonicializeUsername( String username ) {
161-
return Utils.capitalize(username.substring(0,1)) + username.substring(1);
165+
return new PageTitle(username).getText();
162166
}
163167

164168
@Override

app/src/main/java/fr/free/nrw/commons/contributions/UploadCountClient.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import java.net.URL;
1010
import java.util.Locale;
1111

12-
import fr.free.nrw.commons.CommonsApplication;
12+
import fr.free.nrw.commons.PageTitle;
1313
import fr.free.nrw.commons.concurrency.BackgroundPoolExceptionHandler;
1414
import fr.free.nrw.commons.concurrency.ThreadPoolExecutorService;
1515
import timber.log.Timber;
@@ -34,7 +34,8 @@ public ListenableFuture<Integer> getUploadCount(final String userName) {
3434
public void run() {
3535
URL url;
3636
try {
37-
url = new URL(String.format(Locale.ENGLISH, UPLOAD_COUNT_URL_TEMPLATE, userName));
37+
url = new URL(String.format(Locale.ENGLISH, UPLOAD_COUNT_URL_TEMPLATE,
38+
new PageTitle(userName).getText()));
3839
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
3940
try {
4041
BufferedReader bufferedReader = new BufferedReader(new

0 commit comments

Comments
 (0)