Skip to content

Commit c6794f3

Browse files
authored
Backend overhaul fetch media by filename (commons-app#3081)
* Added class MwParseResponse and MwParseResult for receiving parse output * Migrated fetchMediaByFilename to retrofit * Removed unused code * Added tests
1 parent 728ead7 commit c6794f3

11 files changed

+94
-96
lines changed

app/src/main/java/fr/free/nrw/commons/MediaDataExtractor.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ public Single<Media> getMediaFromFileName(String filename) {
6666
* @return
6767
*/
6868
private Single<String> getDiscussion(String filename) {
69-
return mediaWikiApi.fetchMediaByFilename(filename.replace("File", "File talk"))
70-
.flatMap(mediaResult -> mediaWikiApi.parseWikicode(mediaResult.getWikiSource()))
69+
return mediaClient.getPageHtml(filename.replace("File", "File talk"))
7170
.map(discussion -> HtmlCompat.fromHtml(discussion, HtmlCompat.FROM_HTML_MODE_LEGACY).toString())
7271
.onErrorReturn(throwable -> {
7372
Timber.e(throwable, "Error occurred while fetching discussion");

app/src/main/java/fr/free/nrw/commons/media/MediaClient.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,13 @@ public Single<Media> getPictureOfTheDay() {
161161
.map(Media::from)
162162
.single(Media.EMPTY);
163163
}
164+
165+
@NonNull
166+
public Single<String> getPageHtml(String title){
167+
return mediaInterface.getPageHtml(title)
168+
.filter(MwParseResponse::success)
169+
.map(MwParseResponse::parse)
170+
.map(MwParseResult::text)
171+
.first("");
172+
}
164173
}

app/src/main/java/fr/free/nrw/commons/media/MediaInterface.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
* Interface for interacting with Commons media related APIs
1515
*/
1616
public interface MediaInterface {
17+
String MEDIA_PARAMS="&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
18+
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
19+
"|Artist|LicenseShortName|LicenseUrl";
1720
/**
1821
* Checks if a page exists or not.
1922
*
@@ -42,9 +45,7 @@ public interface MediaInterface {
4245
*/
4346
@GET("w/api.php?action=query&format=json&formatversion=2" + //Basic parameters
4447
"&generator=categorymembers&gcmtype=file&gcmsort=timestamp&gcmdir=desc" + //Category parameters
45-
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" + //Media property parameters
46-
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
47-
"|Artist|LicenseShortName|LicenseUrl")
48+
MEDIA_PARAMS)
4849
Observable<MwQueryResponse> getMediaListFromCategory(@Query("gcmtitle") String category, @Query("gcmlimit") int itemLimit, @QueryMap Map<String, String> continuation);
4950

5051
/**
@@ -57,9 +58,7 @@ public interface MediaInterface {
5758
*/
5859
@GET("w/api.php?action=query&format=json&formatversion=2" + //Basic parameters
5960
"&generator=search&gsrwhat=text&gsrnamespace=6" + //Search parameters
60-
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" + //Media property parameters
61-
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
62-
"|Artist|LicenseShortName|LicenseUrl")
61+
MEDIA_PARAMS)
6362
Observable<MwQueryResponse> getMediaListFromSearch(@Query("gsrsearch") String keyword, @Query("gsrlimit") int itemLimit, @QueryMap Map<String, String> continuation);
6463

6564
/**
@@ -69,9 +68,7 @@ public interface MediaInterface {
6968
* @return
7069
*/
7170
@GET("w/api.php?action=query&format=json&formatversion=2" +
72-
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
73-
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
74-
"|Artist|LicenseShortName|LicenseUrl")
71+
MEDIA_PARAMS)
7572
Observable<MwQueryResponse> getMedia(@Query("titles") String title);
7673

7774
/**
@@ -82,8 +79,9 @@ public interface MediaInterface {
8279
* @return
8380
*/
8481
@GET("w/api.php?action=query&format=json&formatversion=2&generator=images" +
85-
"&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=640" +
86-
"&iiextmetadatafilter=DateTime|Categories|GPSLatitude|GPSLongitude|ImageDescription|DateTimeOriginal" +
87-
"|Artist|LicenseShortName|LicenseUrl")
82+
MEDIA_PARAMS)
8883
Observable<MwQueryResponse> getMediaWithGenerator(@Query("titles") String title);
84+
85+
@GET("w/api.php?format=json&action=parse&prop=text")
86+
Observable<MwParseResponse> getPageHtml(@Query("page") String title);
8987
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package fr.free.nrw.commons.media;
2+
3+
import androidx.annotation.Nullable;
4+
import androidx.annotation.VisibleForTesting;
5+
6+
import org.wikipedia.dataclient.mwapi.MwResponse;
7+
8+
public class MwParseResponse extends MwResponse {
9+
@Nullable
10+
private MwParseResult parse;
11+
12+
@Nullable
13+
public MwParseResult parse() {
14+
return parse;
15+
}
16+
17+
public boolean success() {
18+
return parse != null;
19+
}
20+
21+
@VisibleForTesting
22+
protected void setParse(@Nullable MwParseResult parse) {
23+
this.parse = parse;
24+
}
25+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package fr.free.nrw.commons.media;
2+
3+
import androidx.annotation.Nullable;
4+
5+
import com.google.gson.annotations.SerializedName;
6+
7+
public class MwParseResult {
8+
@SuppressWarnings("unused") private int pageid;
9+
@SuppressWarnings("unused") private int index;
10+
private MwParseText text;
11+
12+
public String text() {
13+
return text.text;
14+
}
15+
16+
17+
public class MwParseText{
18+
@SerializedName("*") private String text;
19+
}
20+
}

app/src/main/java/fr/free/nrw/commons/mwapi/ApacheHttpClientMediaWikiApi.java

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import androidx.annotation.NonNull;
66
import androidx.annotation.Nullable;
7-
import com.google.gson.Gson;
87

98
import org.apache.http.conn.ClientConnectionManager;
109
import org.apache.http.conn.scheme.PlainSocketFactory;
@@ -26,10 +25,6 @@
2625
import fr.free.nrw.commons.BuildConfig;
2726
import fr.free.nrw.commons.CommonsApplication;
2827

29-
import fr.free.nrw.commons.BuildConfig;
30-
import fr.free.nrw.commons.CommonsApplication;
31-
32-
import io.reactivex.Single;
3328
import timber.log.Timber;
3429

3530
/**
@@ -54,35 +49,6 @@ public ApacheHttpClientMediaWikiApi(String apiURL) {
5449
api = new CustomMwApi(apiURL, httpClient);
5550
}
5651

57-
@Override
58-
public Single<String> parseWikicode(String source) {
59-
return Single.fromCallable(() -> api.action("flow-parsoid-utils")
60-
.param("from", "wikitext")
61-
.param("to", "html")
62-
.param("content", source)
63-
.param("title", "Main_page")
64-
.get()
65-
.getString("/api/flow-parsoid-utils/@content"));
66-
}
67-
68-
@Override
69-
@NonNull
70-
public Single<MediaResult> fetchMediaByFilename(String filename) {
71-
return Single.fromCallable(() -> {
72-
CustomApiResult apiResult = api.action("query")
73-
.param("prop", "revisions")
74-
.param("titles", filename)
75-
.param("rvprop", "content")
76-
.param("rvlimit", 1)
77-
.param("rvgeneratexml", 1)
78-
.get();
79-
80-
return new MediaResult(
81-
apiResult.getString("/api/query/pages/page/revisions/rev"),
82-
apiResult.getString("/api/query/pages/page/revisions/rev/@parsetree"));
83-
});
84-
}
85-
8652
/**
8753
* Checks to see if a user is currently blocked from Commons
8854
*

app/src/main/java/fr/free/nrw/commons/mwapi/MediaResult.java

Lines changed: 0 additions & 33 deletions
This file was deleted.

app/src/main/java/fr/free/nrw/commons/mwapi/MediaWikiApi.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,6 @@
99

1010
public interface MediaWikiApi {
1111

12-
Single<String> parseWikicode(String source);
13-
14-
@NonNull
15-
Single<MediaResult> fetchMediaByFilename(String filename);
16-
1712
boolean isUserBlockedFromCommons();
1813

1914
void logout();

app/src/main/java/fr/free/nrw/commons/review/ReviewHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public ReviewHelper(MediaClient mediaClient, ReviewInterface reviewInterface) {
3535
}
3636

3737
/**
38-
* Fetches recent changes from MediaWiki AP
38+
* Fetches recent changes from MediaWiki API
3939
* Calls the API to get 10 changes in the last 1 hour
4040
* Earlier we were getting changes for the last 30 days but as the API returns just 10 results
4141
* its best to fetch for just last 1 hour.

app/src/test/kotlin/fr/free/nrw/commons/MediaDataExtractorTest.kt

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
package fr.free.nrw.commons
22

33
import fr.free.nrw.commons.media.MediaClient
4-
import fr.free.nrw.commons.mwapi.MediaResult
54
import fr.free.nrw.commons.mwapi.MediaWikiApi
6-
import fr.free.nrw.commons.mwapi.OkHttpJsonApiClient
75
import io.reactivex.Single
86
import org.junit.Assert.assertTrue
97
import org.junit.Before
@@ -49,13 +47,8 @@ class MediaDataExtractorTest {
4947
`when`(mediaClient?.checkPageExistsUsingTitle(ArgumentMatchers.anyString()))
5048
.thenReturn(Single.just(true))
5149

52-
val mediaResult = mock(MediaResult::class.java)
53-
`when`(mediaResult.wikiSource).thenReturn("some wiki source")
54-
`when`(mwApi?.fetchMediaByFilename(ArgumentMatchers.anyString()))
55-
.thenReturn(Single.just(mediaResult))
56-
57-
`when`(mwApi?.parseWikicode(ArgumentMatchers.anyString()))
58-
.thenReturn(Single.just("discussion text"))
50+
`when`(mediaClient?.getPageHtml(ArgumentMatchers.anyString()))
51+
.thenReturn(Single.just("Test"))
5952

6053
val fetchMediaDetails = mediaDataExtractor?.fetchMediaDetails("test.jpg")?.blockingGet()
6154

app/src/test/kotlin/fr/free/nrw/commons/media/MediaClientTest.kt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,4 +196,30 @@ class MediaClientTest {
196196
assertEquals(media1.filename, "Test")
197197
assertEquals(media2.filename, "Test")
198198
}
199+
200+
@Test
201+
fun getPageHtmlTest() {
202+
val mwParseResult = mock(MwParseResult::class.java)
203+
204+
`when`(mwParseResult.text()).thenReturn("Test")
205+
206+
val mockResponse = MwParseResponse()
207+
mockResponse.setParse(mwParseResult)
208+
209+
`when`(mediaInterface!!.getPageHtml(ArgumentMatchers.anyString()))
210+
.thenReturn(Observable.just(mockResponse))
211+
212+
assertEquals("Test", mediaClient!!.getPageHtml("abcde").blockingGet())
213+
}
214+
215+
@Test
216+
fun getPageHtmlTestNull() {
217+
val mockResponse = MwParseResponse()
218+
mockResponse.setParse(null)
219+
220+
`when`(mediaInterface!!.getPageHtml(ArgumentMatchers.anyString()))
221+
.thenReturn(Observable.just(mockResponse))
222+
223+
assertEquals("", mediaClient!!.getPageHtml("abcde").blockingGet())
224+
}
199225
}

0 commit comments

Comments
 (0)