@@ -36,7 +36,7 @@ def clean_lang(self, request):
36
36
if not re .match (r'^[a-z]{2}([-_][a-zA-Z]{2})?$' ,
37
37
request_form ['lang' ]):
38
38
del request_form ['lang' ]
39
-
39
+
40
40
def __call__ (self , environ , start_response ):
41
41
request = Request (environ )
42
42
path_info = request .path_info
@@ -54,9 +54,19 @@ def __call__(self, environ, start_response):
54
54
if request .GET :
55
55
new_path_info = '%s?%s' % (
56
56
new_path_info , urllib .urlencode (request .GET ))
57
- redirect = exc .HTTPFound (location = new_path_info )
58
- return request .get_response (redirect )(environ , start_response )
59
-
57
+ # If the url contains higher-than-ASCII characters this fails.
58
+ # Since such urls are broken, don't redirect. Fall through to
59
+ # the 404.
60
+ # The reason for handing this is that we're seeing (2017) a lot
61
+ # of penetration testing requests of the form
62
+ # /licenses/by-nd/2.0/%EF%BB%BF%EF%BB%BFThe
63
+ try :
64
+ redirect = exc .HTTPFound (location = new_path_info )
65
+ return request .get_response (redirect )(environ ,
66
+ start_response )
67
+ except UnicodeEncodeError , e :
68
+ # Don't send the Found, fall through to the 404
69
+ pass
60
70
# Return a 404
61
71
response = util .generate_404_response (
62
72
request , routing , environ , self .staticdirector )
0 commit comments