Skip to content

Commit 87c74b4

Browse files
committed
add additional updates
1 parent b69d37c commit 87c74b4

File tree

1 file changed

+138
-12
lines changed

1 file changed

+138
-12
lines changed

bin/create_static_site.sh

+138-12
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#!/bin/bash
2+
set -o errexit
23
set -o errtrace
34
set -o nounset
45

56

6-
if command -v gsed >/dev/null; then
7+
if command -v gsed >/dev/null
8+
then
79
SED=gsed
8-
elif sed --version >/dev/null; then
10+
elif sed --version >/dev/null
11+
then
912
SED=sed
1013
else
1114
echo 'GNU sed is required. If on macOS install `gnu-sed` via brew.' 1>&2
@@ -19,34 +22,157 @@ function _change_to_repo_dir {
1922

2023

2124
function _recreate_docs_dir {
22-
rm -rf docs
23-
cp -a temp_download docs
25+
rm -rf docs/*
26+
cp -a temp_download/* docs/
2427
touch docs/.nojekyll
25-
printf 'stateof.creativecommons.org' > docs/CNAME
28+
printf 'dev-stateof.creativecommons.org' > docs/CNAME
2629
}
2730

2831

29-
function _convert_urls_to_absolute_paths {
30-
# Non-escaped URLs
31-
for _pattern in 'http://stateof\.creativecommons\.org/' \
32-
'https://stateof\.creativecommons\.org/'
32+
function _remove_lines_from_html_files {
33+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Remove unhelpful lines from HTLM files'
34+
for _file in $(find docs -type f -name '*.html')
35+
do
36+
# 1. Remove link: WordPress Edit URI
37+
# 2. Remove link: WordPress JSON API
38+
# 3. Remove link: WordPress RSS
39+
# 4. Remove link: WordPress shortlink
40+
# 5. Remove link: WordPress v0 prefetch
41+
# 6. Remove link: WordPress Windows Live Writer Manifest link
42+
# 7. Remove meta: generator
43+
# 8. Remove script: WordPress stats
44+
# 9. Remove (no)script: stats.creativecommons.org
45+
${SED} \
46+
-e'/rel="EditURI"/d' \
47+
-e'/^<link.*stateof\.creativecommons\.org\/wp-json\//d' \
48+
-e'/type="application\/rss+xml"/d' \
49+
-e"/rel='shortlink'/d" \
50+
-e'/href=.https:\/\/v0\.wordpress\.com\//d' \
51+
-e'/rel="wlwmanifest"/d' \
52+
-e'/name="generator"/d' \
53+
-e'/src=.https:\/\/stats\.wp\.com\//d' \
54+
-e'/src=.https:\/\/stats\.creativecommons\.org\//d' \
55+
--in-place "${_file}"
56+
done
57+
echo
58+
}
59+
60+
61+
function _restore_query_strings_in_html_files {
62+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Restore query strings in HTML files'
63+
for _file in $(find docs -type f -name '*.html')
64+
do
65+
# 1. Restore CSS query strings
66+
# 2. Restore JavaScript query strings
67+
# 3. Restore TTF query strings
68+
# 4. Restore woff/woff2 query strings
69+
# 5. Restore style_dynamic.php query strings
70+
# 6. Restore style_dynamic_responsive.php query strings
71+
${SED} --regexp-extended \
72+
-e's#(\.css)%3F(ver=)#\1?\2#g' \
73+
-e's#(\.js)%3F(ver=)#\1?\2#g' \
74+
-e's#(\.ttf)%3F#\1?#g' \
75+
-e's#(\.woff2+)%3F#\1?#g' \
76+
-e's#(style_dynamic\.php)%3F#\1?#g' \
77+
-e's#(style_dynamic_responsive\.php)%3F#\1?#g' \
78+
--in-place "${_file}"
79+
done
80+
echo
81+
}
82+
83+
function _update_licensebuttons_domain {
84+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Update licensebuttons domain'
85+
for _file in $(find docs/2015 -type f -name '*.html')
86+
do
87+
${SED} \
88+
-e's#//i\.creativecommons\.org/#//licensebuttons.net/#g' \
89+
--in-place "${_file}"
90+
done
91+
echo
92+
}
93+
94+
95+
function _replace_full_urls_with_absolute_paths {
96+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Replace full URLs with absolute paths'
97+
# Non-escaped URLs with protocol
98+
for _pattern in \
99+
'http://stateof\.creativecommons\.org/' \
100+
'https://stateof\.creativecommons\.org/'
33101
do
34102
for _file in $(grep --files-with-matches --max-count=1 \
35103
--recursive "${_pattern}" docs)
36104
do
37-
gsed --in-place -e"s#${_pattern}#/#g" "${_file}"
105+
${SED} --in-place -e"s#${_pattern}#/#g" "${_file}"
38106
done
39107
done
108+
# Non-escaped URLs without protocol
109+
_pattern='//stateof\.creativecommons\.org/'
110+
for _file in $(grep --files-with-matches --max-count=1 \
111+
--recursive "${_pattern}" docs)
112+
do
113+
${SED} --in-place -e"s#${_pattern}#/#g" "${_file}"
114+
done
40115
# Escaped URL
41116
for _file in $(grep --fixed-strings --files-with-matches --max-count=1 \
42117
--recursive 'https:\/\/stateof.creativecommons.org\/' docs)
43118
do
44-
gsed --in-place \
119+
${SED} --in-place \
45120
-e's#https:\\/\\/stateof\.creativecommons\.org\\/#\\/#g' "${_file}"
46121
done
122+
echo
123+
}
124+
125+
126+
function _revert_non_html_conversions {
127+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Revert non-HTML file conversions'
128+
for _file in $(find docs -type f -name '*\?*' -not -name 'index.html*' \
129+
-not -name 'low-bandwidth.html*' -not -name '*orig')
130+
do
131+
local _orig="${_file%????}orig"
132+
local _fixed="${_file%%\?*}"
133+
if [[ -f "${_orig}" ]]
134+
then
135+
# Restore original and remove query strings
136+
rm "${_file}"
137+
mv "${_orig}" "${_fixed}"
138+
else
139+
# Remove query strings
140+
mv "${_file}" "${_fixed}"
141+
fi
142+
done
143+
# Remove innaccurate .html file extension
144+
for _file in $(find docs -type f -name '*.woff2.html')
145+
do
146+
mv "${_file}" "${_file%.html}"
147+
done
148+
echo
149+
}
150+
151+
152+
function _cleanup_plaintext_whitespace {
153+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Clean-up whitespace in plaintext files'
154+
# plaintext files with trailing whitespace
155+
for _file in $(find docs -type f \
156+
\( -name '*.css' -o -name '*.html' -o -name '*.js' \))
157+
do
158+
${SED} --in-place -e's#[ \t]\+$##' "${_file}"
159+
done
160+
echo
161+
}
162+
163+
164+
function _cleanup_orig_file_backups {
165+
printf "\e[1m\e[7m %-80s\e[0m\n" 'Clean-up orig file backups'
166+
find docs -type f -name '*orig' -delete
47167
}
48168

49169

50170
_change_to_repo_dir # must be called first
51171
_recreate_docs_dir
52-
_convert_urls_to_absolute_paths
172+
_remove_lines_from_html_files
173+
_restore_query_strings_in_html_files
174+
_update_licensebuttons_domain
175+
_replace_full_urls_with_absolute_paths
176+
_revert_non_html_conversions
177+
_cleanup_plaintext_whitespace
178+
_cleanup_orig_file_backups

0 commit comments

Comments
 (0)