1
1
#! /bin/bash
2
+ set -o errexit
2
3
set -o errtrace
3
4
set -o nounset
4
5
5
6
6
- if command -v gsed > /dev/null; then
7
+ if command -v gsed > /dev/null
8
+ then
7
9
SED=gsed
8
- elif sed --version > /dev/null; then
10
+ elif sed --version > /dev/null
11
+ then
9
12
SED=sed
10
13
else
11
14
echo ' GNU sed is required. If on macOS install `gnu-sed` via brew.' 1>&2
@@ -19,34 +22,157 @@ function _change_to_repo_dir {
19
22
20
23
21
24
function _recreate_docs_dir {
22
- rm -rf docs
23
- cp -a temp_download docs
25
+ rm -rf docs/ *
26
+ cp -a temp_download/ * docs/
24
27
touch docs/.nojekyll
25
- printf ' stateof.creativecommons.org' > docs/CNAME
28
+ printf ' dev- stateof.creativecommons.org' > docs/CNAME
26
29
}
27
30
28
31
29
- function _convert_urls_to_absolute_paths {
30
- # Non-escaped URLs
31
- for _pattern in ' http://stateof\.creativecommons\.org/' \
32
- ' https://stateof\.creativecommons\.org/'
32
+ function _remove_lines_from_html_files {
33
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Remove unhelpful lines from HTLM files'
34
+ for _file in $( find docs -type f -name ' *.html' )
35
+ do
36
+ # 1. Remove link: WordPress Edit URI
37
+ # 2. Remove link: WordPress JSON API
38
+ # 3. Remove link: WordPress RSS
39
+ # 4. Remove link: WordPress shortlink
40
+ # 5. Remove link: WordPress v0 prefetch
41
+ # 6. Remove link: WordPress Windows Live Writer Manifest link
42
+ # 7. Remove meta: generator
43
+ # 8. Remove script: WordPress stats
44
+ # 9. Remove (no)script: stats.creativecommons.org
45
+ ${SED} \
46
+ -e' /rel="EditURI"/d' \
47
+ -e' /^<link.*stateof\.creativecommons\.org\/wp-json\//d' \
48
+ -e' /type="application\/rss+xml"/d' \
49
+ -e" /rel='shortlink'/d" \
50
+ -e' /href=.https:\/\/v0\.wordpress\.com\//d' \
51
+ -e' /rel="wlwmanifest"/d' \
52
+ -e' /name="generator"/d' \
53
+ -e' /src=.https:\/\/stats\.wp\.com\//d' \
54
+ -e' /src=.https:\/\/stats\.creativecommons\.org\//d' \
55
+ --in-place " ${_file} "
56
+ done
57
+ echo
58
+ }
59
+
60
+
61
+ function _restore_query_strings_in_html_files {
62
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Restore query strings in HTML files'
63
+ for _file in $( find docs -type f -name ' *.html' )
64
+ do
65
+ # 1. Restore CSS query strings
66
+ # 2. Restore JavaScript query strings
67
+ # 3. Restore TTF query strings
68
+ # 4. Restore woff/woff2 query strings
69
+ # 5. Restore style_dynamic.php query strings
70
+ # 6. Restore style_dynamic_responsive.php query strings
71
+ ${SED} --regexp-extended \
72
+ -e' s#(\.css)%3F(ver=)#\1?\2#g' \
73
+ -e' s#(\.js)%3F(ver=)#\1?\2#g' \
74
+ -e' s#(\.ttf)%3F#\1?#g' \
75
+ -e' s#(\.woff2+)%3F#\1?#g' \
76
+ -e' s#(style_dynamic\.php)%3F#\1?#g' \
77
+ -e' s#(style_dynamic_responsive\.php)%3F#\1?#g' \
78
+ --in-place " ${_file} "
79
+ done
80
+ echo
81
+ }
82
+
83
+ function _update_licensebuttons_domain {
84
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Update licensebuttons domain'
85
+ for _file in $( find docs/2015 -type f -name ' *.html' )
86
+ do
87
+ ${SED} \
88
+ -e' s#//i\.creativecommons\.org/#//licensebuttons.net/#g' \
89
+ --in-place " ${_file} "
90
+ done
91
+ echo
92
+ }
93
+
94
+
95
+ function _replace_full_urls_with_absolute_paths {
96
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Replace full URLs with absolute paths'
97
+ # Non-escaped URLs with protocol
98
+ for _pattern in \
99
+ ' http://stateof\.creativecommons\.org/' \
100
+ ' https://stateof\.creativecommons\.org/'
33
101
do
34
102
for _file in $( grep --files-with-matches --max-count=1 \
35
103
--recursive " ${_pattern} " docs)
36
104
do
37
- gsed --in-place -e" s#${_pattern} #/#g" " ${_file} "
105
+ ${SED} --in-place -e" s#${_pattern} #/#g" " ${_file} "
38
106
done
39
107
done
108
+ # Non-escaped URLs without protocol
109
+ _pattern=' //stateof\.creativecommons\.org/'
110
+ for _file in $( grep --files-with-matches --max-count=1 \
111
+ --recursive " ${_pattern} " docs)
112
+ do
113
+ ${SED} --in-place -e" s#${_pattern} #/#g" " ${_file} "
114
+ done
40
115
# Escaped URL
41
116
for _file in $( grep --fixed-strings --files-with-matches --max-count=1 \
42
117
--recursive ' https:\/\/stateof.creativecommons.org\/' docs)
43
118
do
44
- gsed --in-place \
119
+ ${SED} --in-place \
45
120
-e' s#https:\\/\\/stateof\.creativecommons\.org\\/#\\/#g' " ${_file} "
46
121
done
122
+ echo
123
+ }
124
+
125
+
126
+ function _revert_non_html_conversions {
127
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Revert non-HTML file conversions'
128
+ for _file in $( find docs -type f -name ' *\?*' -not -name ' index.html*' \
129
+ -not -name ' low-bandwidth.html*' -not -name ' *orig' )
130
+ do
131
+ local _orig=" ${_file% ????} orig"
132
+ local _fixed=" ${_file%% \? * } "
133
+ if [[ -f " ${_orig} " ]]
134
+ then
135
+ # Restore original and remove query strings
136
+ rm " ${_file} "
137
+ mv " ${_orig} " " ${_fixed} "
138
+ else
139
+ # Remove query strings
140
+ mv " ${_file} " " ${_fixed} "
141
+ fi
142
+ done
143
+ # Remove innaccurate .html file extension
144
+ for _file in $( find docs -type f -name ' *.woff2.html' )
145
+ do
146
+ mv " ${_file} " " ${_file% .html} "
147
+ done
148
+ echo
149
+ }
150
+
151
+
152
+ function _cleanup_plaintext_whitespace {
153
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Clean-up whitespace in plaintext files'
154
+ # plaintext files with trailing whitespace
155
+ for _file in $( find docs -type f \
156
+ \( -name ' *.css' -o -name ' *.html' -o -name ' *.js' \) )
157
+ do
158
+ ${SED} --in-place -e' s#[ \t]\+$##' " ${_file} "
159
+ done
160
+ echo
161
+ }
162
+
163
+
164
+ function _cleanup_orig_file_backups {
165
+ printf " \e[1m\e[7m %-80s\e[0m\n" ' Clean-up orig file backups'
166
+ find docs -type f -name ' *orig' -delete
47
167
}
48
168
49
169
50
170
_change_to_repo_dir # must be called first
51
171
_recreate_docs_dir
52
- _convert_urls_to_absolute_paths
172
+ _remove_lines_from_html_files
173
+ _restore_query_strings_in_html_files
174
+ _update_licensebuttons_domain
175
+ _replace_full_urls_with_absolute_paths
176
+ _revert_non_html_conversions
177
+ _cleanup_plaintext_whitespace
178
+ _cleanup_orig_file_backups
0 commit comments