Skip to content

Commit 5626c40

Browse files
committed
quote url and use linkSite to improve accuracy
1 parent 4c6dbd7 commit 5626c40

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

scripts/1-fetch/gcs_fetch.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,16 @@ def query_gcs(args, service, last_completed_plan_index, plan):
203203
for plan_row in plan[start:stop]: # noqa: E203
204204
index = plan.index(plan_row) + 1
205205
query_info = f"index: {index}, tool: {plan_row['TOOL_IDENTIFIER']}"
206-
encoded_tool_url = urllib.parse.quote(plan_row["TOOL_URL"], safe=":/")
207-
query_params = {"cx": GCS_CX, "q": encoded_tool_url}
206+
# Note that the URL is quoted, which improves accuracy
207+
# https://blog.google/products/search/how-were-improving-search-results-when-you-use-quotes/
208+
encoded_tool_url = urllib.parse.quote(
209+
f'"{plan_row["TOOL_URL"]}"', safe=":/"
210+
)
211+
query_params = {
212+
"cx": GCS_CX,
213+
"linkSite": plan_row["TOOL_URL"].lstrip("/"),
214+
"q": encoded_tool_url,
215+
}
208216
if plan_row["COUNTRY"]:
209217
query_info = f"{query_info}, country: {plan_row['COUNTRY']}"
210218
query_params["cr"] = plan_row["CR"]

0 commit comments

Comments
 (0)