@@ -36,37 +36,35 @@ class CategoriesModel
36
36
* @return
37
37
*/
38
38
fun isSpammyCategory (item : String ): Boolean {
39
- // Check for current and previous year to exclude these categories from removal
40
- val now = Calendar .getInstance()
41
- val curYear = now[Calendar .YEAR ]
42
- val curYearInString = curYear.toString()
43
- val prevYear = curYear - 1
44
- val prevYearInString = prevYear.toString()
45
- Timber .d(" Previous year: %s" , prevYearInString)
46
-
47
- val mentionsDecade = item.matches(" .*0s.*" .toRegex())
48
- val recentDecade = item.matches(" .*20[0-2]0s.*" .toRegex())
49
- val spammyCategory =
50
- item.matches(" (.*)needing(.*)" .toRegex()) ||
51
- item.matches(" (.*)taken on(.*)" .toRegex())
52
39
53
40
// always skip irrelevant categories such as Media_needing_categories_as_of_16_June_2017(Issue #750)
41
+ val spammyCategory = item.matches(" (.*)needing(.*)" .toRegex())
42
+ || item.matches(" (.*)taken on(.*)" .toRegex())
43
+
44
+ // checks for
45
+ // dd/mm/yyyy or yy
46
+ // yyyy or yy/mm/dd
47
+ // yyyy or yy/mm
48
+ // mm/yyyy or yy
49
+ // for `yy` it is assumed that 20XX is implicit.
50
+ // with separators [., /, -]
51
+ val isIrrelevantCategory =
52
+ item.contains(""" \d{1,2}[-/.]\d{1,2}[-/.]\d{2,4}|\d{2,4}[-/.]\d{1,2}[-/.]\d{1,2}|\d{2,4}[-/.]\d{1,2}|\d{1,2}[-/.]\d{2,4}""" .toRegex())
53
+
54
+
54
55
if (spammyCategory) {
55
56
return true
56
57
}
57
58
58
- if (mentionsDecade) {
59
- // Check if the year in the form of XX(X)0s is recent/relevant, i.e. in the 2000s or 2010s/2020s as stated in Issue #1029
60
- // Example: "2020s" is OK, but "1920s" is not (and should be skipped)
61
- return ! recentDecade
62
- } else {
63
- // If it is not an year in decade form (e.g. 19xxs/20xxs), then check if item contains a 4-digit year
64
- // anywhere within the string (.* is wildcard) (Issue #47)
65
- // And that item does not equal the current year or previous year
66
- return item.matches(" .*(19|20)\\ d{2}.*" .toRegex()) &&
67
- ! item.contains(curYearInString) &&
68
- ! item.contains(prevYearInString)
59
+ if (isIrrelevantCategory){
60
+ return true
69
61
}
62
+
63
+ val hasYear = item.matches(" (.*\\ d{4}.*)" .toRegex())
64
+ val validYearsRange = item.matches(" .*(20[0-9]{2}).*" .toRegex())
65
+
66
+ // finally if there's 4 digits year exists in XXXX it should only be in 20XX range.
67
+ return hasYear && ! validYearsRange
70
68
}
71
69
72
70
/* *
0 commit comments