-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathserver.py
More file actions
117 lines (83 loc) · 2.89 KB
/
Copy pathserver.py
File metadata and controls
117 lines (83 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# pylint: disable=wrong-import-order
from __future__ import absolute_import, division, print_function, unicode_literals
from gevent.wsgi import WSGIServer
from flask import Flask, request, render_template
import os
import sys
import json
import requests
sys.path.insert(0, os.getcwd())
from cosrlib.document import load_document_type
from cosrlib.config import config
from cosrlib.searcher import Searcher
from cosrlib.indexer import Indexer
CURRENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
app = Flask(
"explainer",
static_folder=os.path.join(CURRENT_DIRECTORY, "static"),
template_folder=os.path.join(CURRENT_DIRECTORY, "templates")
)
indexer = Indexer()
indexer.connect()
searcher = Searcher()
searcher.connect()
@app.route('/')
def route_search():
""" Homepage, for debugging searches """
return render_template("search.html", config={})
@app.route('/url')
def route_url():
""" URL page, for debugging parsing """
return render_template("url.html", config={})
@app.route('/api/searchdebug')
def debug_search():
""" API route for search debug """
query = request.args.get("q")
lang = request.args.get("g") or "en"
results = searcher.search(query, lang=lang, explain=True, fetch_docs=True)
return json.dumps(results)
@app.route('/api/urldebug')
def debug_url():
""" API route for URL debug """
# TODO: have a quota per ip on this API to prevent abuse
url = request.args.get("url")
# Special case for local files
if url.startswith("tests/") and config["ENV"] == "local":
with open(url, "rb") as f:
cnt = f.read()
headers = {}
else:
if not url.startswith("http"):
url = "http://" + url
req = requests.get(url)
cnt = req.content
headers = dict(req.headers)
doc = load_document_type("html", cnt, url=str(url), headers=headers)
parsed = indexer.parse_document(doc)
global_rank, ranking_signals = indexer.ranker.get_global_document_rank(doc, parsed["url_metadata"])
# URL class is not serializable
links = [{
"href": link["href"].url,
"words": link.get("words")
} for link in doc.get_hyperlinks()]
ret = {
"url": parsed["url"].url,
"word_groups": doc.get_word_groups(),
"rank": global_rank,
"title_raw": doc.get_title(),
"title": parsed["title_formatted"],
"summary": parsed["summary_formatted"],
"langs": parsed["langs"],
"links": links,
"ranking_signals": ranking_signals
}
return json.dumps(ret)
def main():
if config["ENV"] == "local":
app.debug = True
print("Explainer listening on http://%s" % config["EXPLAINER"])
sys.stdout.flush()
http_server = WSGIServer((config["EXPLAINER"].split(":")[0], int(config["EXPLAINER"].split(":")[1])), app)
http_server.serve_forever()
if __name__ == '__main__':
main()