-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathurls.py
More file actions
100 lines (79 loc) · 3.21 KB
/
Copy pathurls.py
File metadata and controls
100 lines (79 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from numpy import median, percentile, mean
from time import clock
import os
import gc
import tabulate
import sys
import urlparse
import urlparse2
from uritools import urisplit as uritools_urisplit
from uritools import urijoin as uritools_urijoin
from yurl import URL as yurl_url
import pygurl
# Disabled benchmarks
# import slimurl
# import urlparse3
import cyuri
sys.path.insert(-1, os.path.dirname(os.path.dirname(__file__)))
import urlparse4
gc.disable()
REPEATS = 10
URLS = []
for fp in os.listdir("tests/urls/"):
with open("tests/urls/%s" % fp) as f:
URLS += f.readlines()
data = []
def benchmark(name, func, debug=False):
times = []
for n in range(0, REPEATS):
for i, url in enumerate(URLS):
u = url.strip()
if debug:
print u
t = clock()
func(u)
times.append(clock() - t)
row = [name, sum(times), mean(times), median(times), percentile(times, 90)]
print row
data.append(row)
def title(name):
data.append(["", "", "", "", ""])
data.append(["%s:" % name, "", "", "", ""])
data.append(["----", "----", "----", "----", "----"])
# Segfault: https://github.com/mitghi/cyuri/issues/1
cyuri_parser = cyuri.uriparser()
title("urlsplit")
benchmark("urlparse4", lambda url: urlparse4.urlsplit(url))
benchmark("pygurl", lambda url: pygurl.ParseStandard(url))
benchmark("uritools", lambda url: uritools_urisplit(url))
benchmark("yurl", lambda url: yurl_url(url))
benchmark("urlparse2", lambda url: urlparse2.urlsplit(url))
benchmark("urlparse", lambda url: urlparse.urlsplit(url))
benchmark("cyuri", lambda url: cyuri_parser.components(url))
title("urljoin_sibling")
benchmark("urlparse4", lambda url: urlparse4.urljoin(url, "sibling.html?q=1#e=b"))
benchmark("pygurl", lambda url: pygurl.URL(url).Resolve("sibling.html?q=1#e=b"))
benchmark("uritools", lambda url: uritools_urijoin(url, "sibling.html?q=1#e=b"))
benchmark("yurl", lambda url: yurl_url(url) + yurl_url("sibling.html?q=1#e=b"))
benchmark("urlparse2", lambda url: urlparse2.urljoin(url, "sibling.html?q=1#e=b"))
benchmark("urlparse", lambda url: urlparse.urljoin(url, "sibling.html?q=1#e=b"))
benchmark("cyuri", lambda url: cyuri_parser.join(url, "sibling.html?q=1#e=b"))
# Not very representative because some libraries have functions to access the host directly without parsing the rest.
# Might still be useful for some people!
title("hostname")
benchmark("urlparse4", lambda url: urlparse4.urlsplit(url).hostname)
benchmark("pygurl", lambda url: pygurl.URL(url).host())
benchmark("uritools", lambda url: uritools_urisplit(url).host)
benchmark("yurl", lambda url: yurl_url(url).host)
benchmark("urlparse2", lambda url: urlparse2.urlsplit(url).hostname)
benchmark("urlparse", lambda url: urlparse.urlsplit(url).hostname)
benchmark("cyuri", lambda url: cyuri_parser.components(url)["host"])
# Very slow!
# benchmark("slimurl", lambda url: slimurl.URL(url))
# Breaks on simple URLs like http://1-14th.com/timeline-4-66T.htm
# benchmark("urlparse3_urlsplit", lambda url: urlparse3.parse_url(url))
print
print "Benchmark results on %s URLs x %s times, in seconds:" % (len(URLS), REPEATS)
print
print tabulate.tabulate(data, headers=["Name", "Sum", "Mean", "Median", "90%"])
print