forked from jarun/googler
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_googler.py
More file actions
123 lines (87 loc) · 3.59 KB
/
Copy pathtest_googler.py
File metadata and controls
123 lines (87 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import json
import os
import pathlib
import re
import subprocess
import pytest
ROOT = pathlib.Path(__file__).parent.parent
GOOGLER = ROOT / "googler"
# Load preset options from the environment, in case the testing
# environment need options like --ipv4 or --proxy to connect.
PRESET_OPTIONS = os.getenv("GOOGLER_PRESET_OPTIONS", "").split()
class GooglerResults:
def __init__(self, argv):
self.argv = argv
json_output = subprocess.check_output(
[str(GOOGLER), *PRESET_OPTIONS, "--debug", "--json", *argv]
).decode("utf-8")
self.results = json.loads(json_output)
assert self.results, "no results"
def all_should(self, predicate):
# Using a loop for better error reporting.
for result in self.results:
assert predicate(result)
def some_should(self, predicate):
any(list(map(predicate, self.results)))
GR = GooglerResults
@pytest.mark.parametrize("query", ["english", "中文"])
def test_default_search(query):
def have_url_title_and_abstract(result):
return bool(
result.get("url") and result.get("title") and ("abstract" in result)
)
def have_matched_keywords(result):
return bool(result.get("matches"))
gr = GR([query])
gr.all_should(have_url_title_and_abstract)
gr.some_should(have_matched_keywords)
@pytest.mark.skip(reason="Google News format has changed")
def test_news_search():
def have_metadata(result):
return bool(result.get("metadata"))
def have_time_info_in_metadata(result):
return re.search(r"(hour|days)? ago", result.get("metadata", "")) is not None
gr = GR(["--news", "--lang=en", "google"])
gr.all_should(have_metadata)
gr.some_should(have_time_info_in_metadata)
def test_videos_search():
def be_from_youtube(result):
return re.match(r"https://(www\.)?youtube.com/", result["url"]) is not None
def have_uploader_in_metadata(result):
return "Uploaded by" in result.get("metadata", "")
gr = GR(["--videos", "--lang=en", "olympics youtube"])
gr.some_should(be_from_youtube)
gr.some_should(have_uploader_in_metadata)
def test_site_search():
def be_from_wikipedia(result):
return result["url"].startswith("https://en.wikipedia.org")
GR(["--site=en.wikipedia.org", "google"]).all_should(be_from_wikipedia)
GR(["site:en.wikipedia.org google"]).all_should(be_from_wikipedia)
@pytest.mark.parametrize("tld", ["in", "de"])
def test_tld_option(tld):
# Just a lame test to make sure there are results.
GR(["--tld", tld, "google"])
def test_exact_option():
def have_gogole_in_title_or_abstract(result):
return (
"gogole" in result["title"].lower()
or "gogole" in result["abstract"].lower()
)
gr = GR(["--exact", "gogole"])
gr.some_should(have_gogole_in_title_or_abstract)
def test_time_option():
def have_time_in_metadata(result):
return (
re.search(r"hours?|days?|(?P<year>\b\d{4}\b)", result.get("metadata", ""))
is not None
)
gr = GR(["--time=y1", "--lang=en", "google"])
gr.some_should(have_time_in_metadata)
def test_from_to_options():
def have_year_2019_in_metadata(result):
return "2019" in result.get("metadata", "")
gr = GR(["--from=01/01/2019", "--to=12/31/2019", "--lang=en", "google"])
# One would expect all results to have 2019 in metadata, but
# sometimes some results just don't have that line for whatever reason:
# https://github.com/zmwangx/googler/runs/704110651?check_suite_focus=true
gr.some_should(have_year_2019_in_metadata)