-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathserver.py
More file actions
66 lines (46 loc) · 1.79 KB
/
Copy pathserver.py
File metadata and controls
66 lines (46 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import sys
if __name__ == "__main__":
from gevent.server import StreamServer
from mprpc import RPCServer
sys.path.insert(-1, os.path.normpath(os.path.join(__file__, "../../")))
from cosrlib.url import URL
from urlserver.id_generator import make_url_id, make_domain_id
from urlserver.storage import Storage
from urlserver.protos import urlserver_pb2
db = Storage(read_only=True)
class URLServer(RPCServer):
""" RPC server for getting static metadata about URLs.
For simplicity we currently use mprpc, but we should migrate to gRPC or similar to
be able to send protobufs directly (and avoid re-encoding them as MessagePacks!)
"""
def get_ids(self, urls):
""" Return a list of IDs for these URLs """
ret = []
for u in urls:
url = URL(u)
ret.append(make_url_id(url))
return ret
def get_domain_ids(self, urls):
""" Return a list of domain IDs for these URLs """
ret = []
for u in urls:
url = URL(u)
ret.append(make_domain_id(url))
return ret
def get_metadata(self, urls):
""" Return a list of tuples of metadata for these *normalized* URLs """
ret = []
for url in urls:
data = db.get(url) if db.db else None
# If the URL has been in none of our static databases, we still want to return an ID
if data is None:
obj = urlserver_pb2.UrlMetadata()
obj.id = make_url_id(URL(url))
data = obj.SerializeToString()
ret.append(data)
return ret
if __name__ == "__main__":
server = StreamServer(('0.0.0.0', 9702), URLServer())
server.serve_forever()