Skip to content

Commit 6cf42a4

Browse files
committed
Build: Enable typesense scraper
Ref jquery/infrastructure-puppet#33
1 parent 9cd5ba8 commit 6cf42a4

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

.github/workflows/typesense.yaml

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: typesense
2+
on:
3+
# Or after a deployment
4+
push:
5+
branches:
6+
- main
7+
# Or manually
8+
workflow_dispatch:
9+
10+
jobs:
11+
typesense:
12+
name: Update Typesense
13+
if: ${{ github.repository_owner == 'jquery' }} # skip on forks
14+
runs-on: ubuntu-latest
15+
steps:
16+
- uses: actions/checkout@v3
17+
- name: Docsearch Scraper
18+
shell: bash
19+
run: |
20+
docker run \
21+
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_ADMIN_KEY }} \
22+
-e TYPESENSE_HOST="${{ secrets.TYPESENSE_HOST }}" \
23+
-e TYPESENSE_PORT="443" \
24+
-e TYPESENSE_PROTOCOL="https" \
25+
-e CONFIG="$(cat docsearch.config.json | jq -r tostring)" \
26+
typesense/docsearch-scraper:0.8.0

docsearch.config.json

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"index_name": "jqueryui_com",
3+
"start_urls": [
4+
{ "url": "https://api.jqueryui.com", "selectors_key": "api", "page_rank": 20 },
5+
{ "url": "https://jqueryui.com", "page_rank": 10 }
6+
],
7+
"// stop_urls": [
8+
"// Exclude URLs containing '?' such as /themeroller/?..."
9+
],
10+
"stop_urls": [
11+
"\\?",
12+
".com/category/",
13+
".com/resources/",
14+
".com\\/\\d\\."
15+
],
16+
"selectors": {
17+
"default": {
18+
"lvl0": {
19+
"selector": "#menu-top .menu-item.current > a",
20+
"global": true,
21+
"default_value": "Documentation"
22+
},
23+
"lvl1": "#content h1",
24+
"lvl2": "#content h2",
25+
"lvl3": "#content h3",
26+
"lvl4": "#content h4",
27+
"lvl5": "#content h5",
28+
"text": "#content p, #content li, #content tr"
29+
},
30+
"api": {
31+
"lvl0": {
32+
"selector": "#categories .cat-item.current-cat > a",
33+
"global": true,
34+
"default_value": "API"
35+
},
36+
"lvl1": "#content h1",
37+
"lvl2": "#content h2, #content h4.name",
38+
"lvl3": "#content h3, #content h4:not(.name)",
39+
"lvl4": "#content h5, #content strong:first-child",
40+
"text": ".entry-content p, .entry-content li"
41+
}
42+
},
43+
"custom_settings": {
44+
"token_separators": ["_", "-", "."]
45+
},
46+
"selectors_exclude": [
47+
"header ~ article",
48+
".returns",
49+
".version-details",
50+
".section-title",
51+
".icon-link.toc-link",
52+
"[class^=toclevel]",
53+
"#toctitle",
54+
".desc strong:first-child",
55+
"#quick-nav header h2"
56+
],
57+
"min_indexed_level": 2,
58+
"scrape_start_urls": false
59+
}

0 commit comments

Comments
 (0)