Skip to content

Commit 960e2bb

Browse files
authored
Merge pull request #133 from creativecommons/workflows-update
add bot info to workflows
2 parents af0a401 + b84fe81 commit 960e2bb

35 files changed

+180
-85
lines changed

.github/workflows/fetch.yml

+13-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ name: Fetch Data
22

33
on:
44
schedule:
5-
# 1:15am onwards, days 1-20, first month of each quarter
6-
- cron: '15 1,5,9,13,17,21,23 1-20 1,4,7,10 *'
5+
# at 01:15 on days 1-20 in first month of each quarter
6+
- cron: '15 1 1-20 1,4,7,10 *'
77
workflow_dispatch:
88

99
jobs:
@@ -14,9 +14,20 @@ jobs:
1414
GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }}
1515
GCS_CX: ${{ secrets.GCS_CX }}
1616

17+
# CC Technology team members:
18+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
19+
# BOT_ secrets
20+
1721
steps:
1822
- name: Checkout repository
1923
uses: actions/checkout@v4
24+
with:
25+
token: ${{ secrets.BOT_TOKEN }}
26+
27+
- name: Configure git
28+
run: |
29+
git config user.name "${{ secrets.BOT_NAME }}"
30+
git config user.email "${{ secrets.BOT_EMAIL }}"
2031
2132
- name: Set up Python
2233
uses: actions/setup-python@v5

.github/workflows/process.yml.disabled

+13-2
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,28 @@ name: Process Data
22

33
on:
44
schedule:
5-
# 1:15am onwards, days 1-20, second month of each quarter
6-
- cron: '15 1,5,9,13,17,21,23 1-20 2,5,8,11 *'
5+
# at 01:15 on days 1-20 in second month of each quarter
6+
- cron: '15 1 1-20 2,5,8,11 *'
77
workflow_dispatch:
88

99
jobs:
1010
process:
1111
runs-on: ubuntu-latest
1212

13+
# CC Technology team members:
14+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
15+
# BOT_ secrets
16+
1317
steps:
1418
- name: Checkout repository
1519
uses: actions/checkout@v4
20+
with:
21+
token: ${{ secrets.BOT_TOKEN }}
22+
23+
- name: Configure git
24+
run: |
25+
git config user.name "${{ secrets.BOT_NAME }}"
26+
git config user.email "${{ secrets.BOT_EMAIL }}"
1627

1728
- name: Set up Python
1829
uses: actions/setup-python@v5

.github/workflows/report.yml

+13-3
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,28 @@ name: Generate Report
22

33
on:
44
schedule:
5-
# 1:15am onwards, days 1-20, third month of each quarter
6-
- cron: '15 1,5,9,13,17,21,23 1-20 3,6,9,12 *'
5+
# at 01:15 on days 1-20 in third month of each quarter
6+
- cron: '15 1 1-20 3,6,9,12 *'
77
workflow_dispatch:
88

9-
109
jobs:
1110
generate-report:
1211
runs-on: ubuntu-latest
1312

13+
# CC Technology team members:
14+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
15+
# BOT_ secrets
16+
1417
steps:
1518
- name: Checkout repository
1619
uses: actions/checkout@v4
20+
with:
21+
token: ${{ secrets.BOT_TOKEN }}
22+
23+
- name: Configure git
24+
run: |
25+
git config user.name "${{ secrets.BOT_NAME }}"
26+
git config user.email "${{ secrets.BOT_EMAIL }}"
1727
1828
- name: Set up Python
1929
uses: actions/setup-python@v5

pre-automation/visualization/visualization_engineering.ipynb

+39-34
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"\n",
3939
"\n",
4040
"import warnings\n",
41+
"\n",
4142
"warnings.filterwarnings(\"ignore\")"
4243
]
4344
},
@@ -109,7 +110,9 @@
109110
"raw_ia_license_data = pd.read_csv(dataset_paths[\"ia_license\"])\n",
110111
"raw_metmuseum_license_data = pd.read_csv(dataset_paths[\"metmuseum_license\"])\n",
111112
"raw_vimeo_license_data = pd.read_csv(dataset_paths[\"vimeo_license\"])\n",
112-
"raw_wikicommons_license_data = pd.read_csv(dataset_paths[\"wikicommons_license\"], encoding=\"latin-1\")\n",
113+
"raw_wikicommons_license_data = pd.read_csv(\n",
114+
" dataset_paths[\"wikicommons_license\"], encoding=\"latin-1\"\n",
115+
")\n",
113116
"raw_wikipedia_license_data = pd.read_csv(dataset_paths[\"wikipedia_license\"])\n",
114117
"raw_youtube_time_data = pd.read_csv(dataset_paths[\"youtube_time\"])"
115118
]
@@ -2923,36 +2926,36 @@
29232926
"outputs": [],
29242927
"source": [
29252928
"rename_dict = {\n",
2926-
" \"Bolivia\": \"Bolivia (Plurinational State of)\",\n",
2927-
" \"Cape Verde\": \"Cabo Verde\",\n",
2928-
" \"Congo the Democratic Republic of the\": \"Congo, Democratic Republic of the\",\n",
2929-
" \"Cote D'ivoire\": \"Côte d'Ivoire\",\n",
2930-
" \"Croatia (Hrvatska)\": \"Croatia\",\n",
2931-
" \"Czech Republic\": \"Czechia\",\n",
2932-
" \"East Timor\": \"Timor-Leste\",\n",
2933-
" \"Heard Island and Mcdonald Islands\": \"Heard Island and McDonald Islands\",\n",
2934-
" \"Holy See (Vatican City State)\": \"Holy See\",\n",
2935-
" \"Iran Islamic Republic of\": \"Iran (Islamic Republic of)\",\n",
2936-
" \"Korea Democratic People's Republic of\": \"Korea (Democratic People's Republic of)\",\n",
2937-
" \"Korea Republic of\": \"Korea, Republic of\",\n",
2938-
" \"Libyan Arab Jamahiriya\": \"Libya\",\n",
2939-
" \"Macedonia the Former Yugosalv Republic of\": \"North Macedonia\",\n",
2940-
" \"Micronesia Federated States of\": \"Micronesia (Federated States of)\",\n",
2941-
" \"Moldova Republic of\": \"Moldova, Republic of\",\n",
2942-
" \"Netherlands Antilles\": \"Netherlands\",\n",
2943-
" \"Palestinian Territory\": \"Palestine, State of\",\n",
2944-
" \"Reunion\": \"Réunion\",\n",
2945-
" \"Saint Helena\": \"Saint Helena, Ascension and Tristan da Cunha\",\n",
2946-
" \"Swaziland\": \"Eswatini\",\n",
2947-
" \"Taiwan Province of China\": \"Taiwan, Province of China\",\n",
2948-
" \"Tanzania United Republic of\": \"Tanzania, United Republic of\",\n",
2949-
" \"United Kingdom\": \"United Kingdom of Great Britain and Northern Ireland\",\n",
2950-
" \"United States\": \"United States of America\",\n",
2951-
" \"Venezuela\": \"Venezuela (Bolivarian Republic of)\",\n",
2952-
" \"Vietnam\": \"Viet Nam\",\n",
2953-
" \"Virgin Islands British\": \"Virgin Islands (British)\",\n",
2954-
" \"Virgin Islands U.S.\": \"Virgin Islands (U.S.)\",\n",
2955-
" }"
2929+
" \"Bolivia\": \"Bolivia (Plurinational State of)\",\n",
2930+
" \"Cape Verde\": \"Cabo Verde\",\n",
2931+
" \"Congo the Democratic Republic of the\": \"Congo, Democratic Republic of the\",\n",
2932+
" \"Cote D'ivoire\": \"Côte d'Ivoire\",\n",
2933+
" \"Croatia (Hrvatska)\": \"Croatia\",\n",
2934+
" \"Czech Republic\": \"Czechia\",\n",
2935+
" \"East Timor\": \"Timor-Leste\",\n",
2936+
" \"Heard Island and Mcdonald Islands\": \"Heard Island and McDonald Islands\",\n",
2937+
" \"Holy See (Vatican City State)\": \"Holy See\",\n",
2938+
" \"Iran Islamic Republic of\": \"Iran (Islamic Republic of)\",\n",
2939+
" \"Korea Democratic People's Republic of\": \"Korea (Democratic People's Republic of)\",\n",
2940+
" \"Korea Republic of\": \"Korea, Republic of\",\n",
2941+
" \"Libyan Arab Jamahiriya\": \"Libya\",\n",
2942+
" \"Macedonia the Former Yugosalv Republic of\": \"North Macedonia\",\n",
2943+
" \"Micronesia Federated States of\": \"Micronesia (Federated States of)\",\n",
2944+
" \"Moldova Republic of\": \"Moldova, Republic of\",\n",
2945+
" \"Netherlands Antilles\": \"Netherlands\",\n",
2946+
" \"Palestinian Territory\": \"Palestine, State of\",\n",
2947+
" \"Reunion\": \"Réunion\",\n",
2948+
" \"Saint Helena\": \"Saint Helena, Ascension and Tristan da Cunha\",\n",
2949+
" \"Swaziland\": \"Eswatini\",\n",
2950+
" \"Taiwan Province of China\": \"Taiwan, Province of China\",\n",
2951+
" \"Tanzania United Republic of\": \"Tanzania, United Republic of\",\n",
2952+
" \"United Kingdom\": \"United Kingdom of Great Britain and Northern Ireland\",\n",
2953+
" \"United States\": \"United States of America\",\n",
2954+
" \"Venezuela\": \"Venezuela (Bolivarian Republic of)\",\n",
2955+
" \"Vietnam\": \"Viet Nam\",\n",
2956+
" \"Virgin Islands British\": \"Virgin Islands (British)\",\n",
2957+
" \"Virgin Islands U.S.\": \"Virgin Islands (U.S.)\",\n",
2958+
"}"
29562959
]
29572960
},
29582961
{
@@ -3091,9 +3094,11 @@
30913094
"google_country_data = raw_google_country_data_transpose.copy()\n",
30923095
"google_country_data.rename(index=rename_dict, inplace=True)\n",
30933096
"\n",
3094-
"country_codes_data.set_index('name', inplace=True)\n",
3095-
"google_country_data['iso_a3'] = google_country_data.index.map(country_codes_data['alpha-3'])\n",
3096-
"google_geo_data = google_country_data.dropna(subset=['iso_a3'])\n",
3097+
"country_codes_data.set_index(\"name\", inplace=True)\n",
3098+
"google_country_data[\"iso_a3\"] = google_country_data.index.map(\n",
3099+
" country_codes_data[\"alpha-3\"]\n",
3100+
")\n",
3101+
"google_geo_data = google_country_data.dropna(subset=[\"iso_a3\"])\n",
30973102
"\n",
30983103
"google_geo_data"
30993104
]

scripts/1-fetch/deviantart_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def record_results(license_type, data):
195195
"a",
196196
newline="",
197197
) as f:
198-
writer = csv.writer(f)
198+
writer = csv.writer(f, dialect="unix")
199199
writer.writerow(row)
200200

201201

@@ -263,7 +263,9 @@ def main():
263263
save_state(state)
264264

265265
# Add and commit changes
266-
shared.add_and_commit(PATHS["repo"], "Added and committed DeviantArt data")
266+
shared.add_and_commit(
267+
PATHS["repo"], PATHS["data_quarter"], "Add and commit DeviantArt data"
268+
)
267269

268270
# Push changes
269271
shared.push_changes(PATHS["repo"])

scripts/1-fetch/flickr_fetched.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,8 @@ def save_license_totals():
351351
license_total_path = os.path.join(
352352
PATHS["data_phase"], "flickr_fetched", "license_total.csv"
353353
)
354-
with open(license_total_path, "w") as csvfile:
355-
writer = csv.writer(csvfile)
354+
with open(license_total_path, "w") as f:
355+
writer = csv.writer(f, dialect="unix")
356356
writer.writerow(["License", "Total"])
357357
for license, total in license_counts.items():
358358
writer.writerow([license, total])
@@ -367,7 +367,9 @@ def main():
367367
LOGGER.info("Script execution completed successfully.")
368368

369369
# Add and commit changes
370-
shared.add_and_commit(PATHS["repo"], "Added and committed new reports")
370+
shared.add_and_commit(
371+
PATHS["repo"], PATHS["data_quarter"], "Add and commit new reports"
372+
)
371373

372374
# Push changes
373375
shared.push_changes(PATHS["repo"])

scripts/1-fetch/gcs_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def record_results(results):
326326
with open(
327327
os.path.join(PATHS["data_phase"], "gcs_fetched.csv"), "a", newline=""
328328
) as f:
329-
writer = csv.writer(f)
329+
writer = csv.writer(f, dialect="unix")
330330
for result in results:
331331
writer.writerow(result)
332332

@@ -375,7 +375,9 @@ def main():
375375
save_state(state)
376376

377377
# Add and commit changes
378-
shared.add_and_commit(PATHS["repo"], "Added and committed new reports")
378+
shared.add_and_commit(
379+
PATHS["repo"], PATHS["data_quarter"], "Add and commit new reports"
380+
)
379381

380382
# Push changes
381383
shared.push_changes(PATHS["repo"])

scripts/1-fetch/github_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def record_results(license_type, data):
122122
"a",
123123
newline="",
124124
) as f:
125-
writer = csv.writer(f)
125+
writer = csv.writer(f, dialect="unix")
126126
writer.writerow(row)
127127

128128

@@ -187,7 +187,9 @@ def main():
187187
save_state(state)
188188

189189
# Add and commit changes
190-
shared.add_and_commit(PATHS["repo"], "Added and committed GitHub data")
190+
shared.add_and_commit(
191+
PATHS["repo"], PATHS["data_quarter"], "Add and commit GitHub data"
192+
)
191193

192194
# Push changes
193195
shared.push_changes(PATHS["repo"])

scripts/1-fetch/internetarchive_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def record_results(license_type, data):
154154
"a",
155155
newline="",
156156
) as f:
157-
writer = csv.writer(f)
157+
writer = csv.writer(f, dialect="unix")
158158
writer.writerow(row)
159159

160160

@@ -223,7 +223,9 @@ def main():
223223

224224
# Add and commit changes
225225
shared.add_and_commit(
226-
PATHS["repo"], "Added and committed Internet Archive data"
226+
PATHS["repo"],
227+
PATHS["data_quarter"],
228+
"Add and commit Internet Archive data",
227229
)
228230

229231
# Push changes

scripts/1-fetch/metmuseum_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def record_results(license_type, data):
123123
"a",
124124
newline="",
125125
) as f:
126-
writer = csv.writer(f)
126+
writer = csv.writer(f, dialect="unix")
127127
writer.writerow(row)
128128

129129

@@ -191,7 +191,9 @@ def main():
191191
save_state(state)
192192

193193
# Add and commit changes
194-
shared.add_and_commit(PATHS["repo"], "Added and committed MetMuseum data")
194+
shared.add_and_commit(
195+
PATHS["repo"], PATHS["data_quarter"], "Add and commit MetMuseum data"
196+
)
195197

196198
# Push changes
197199
shared.push_changes(PATHS["repo"])

scripts/1-fetch/vimeo_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def record_results(license_type, data):
165165
with open(
166166
os.path.join(PATHS["data_phase"], "vimeo_fetched.csv"), "a", newline=""
167167
) as f:
168-
writer = csv.writer(f)
168+
writer = csv.writer(f, dialect="unix")
169169
writer.writerow(row)
170170

171171

@@ -233,7 +233,9 @@ def main():
233233
save_state(state)
234234

235235
# Add and commit changes
236-
shared.add_and_commit(PATHS["repo"], "Added and committed Vimeo data")
236+
shared.add_and_commit(
237+
PATHS["repo"], PATHS["data_quarter"], "Add and commit Vimeo data"
238+
)
237239

238240
# Push changes
239241
shared.push_changes(PATHS["repo"])

scripts/1-fetch/wikicommons_fetched.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def record_results(license_type, data):
176176
"a",
177177
newline="",
178178
) as f:
179-
writer = csv.writer(f)
179+
writer = csv.writer(f, dialect="unix")
180180
writer.writerow(row)
181181

182182

@@ -275,7 +275,7 @@ def main():
275275

276276
# Add and commit changes
277277
shared.add_and_commit(
278-
PATHS["repo"], "Added and committed WikiCommons data"
278+
PATHS["repo"], PATHS["data_quarter"], "Add and commit WikiCommons data"
279279
)
280280

281281
# Push changes

scripts/1-fetch/wikipedia_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def record_results(stats):
137137
"a",
138138
newline="",
139139
) as f:
140-
writer = csv.writer(f)
140+
writer = csv.writer(f, dialect="unix")
141141
writer.writerow(row)
142142

143143

@@ -218,7 +218,9 @@ def main():
218218
save_state(state)
219219

220220
# Add and commit changes
221-
shared.add_and_commit(PATHS["repo"], "Added and committed Wikipedia data")
221+
shared.add_and_commit(
222+
PATHS["repo"], PATHS["data_quarter"], "Add and commit Wikipedia data"
223+
)
222224

223225
# Push changes
224226
shared.push_changes(PATHS["repo"])

scripts/1-fetch/youtube_fetched.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def record_results(license_type, time, document_count):
174174
"a",
175175
newline="",
176176
) as f:
177-
writer = csv.writer(f)
177+
writer = csv.writer(f, dialect="unix")
178178
writer.writerow(row)
179179

180180

@@ -256,7 +256,9 @@ def main():
256256
save_state(state)
257257

258258
# Add and commit changes
259-
shared.add_and_commit(PATHS["repo"], "Added and committed YouTube data")
259+
shared.add_and_commit(
260+
PATHS["repo"], PATHS["data_quarter"], "Add and commit YouTube data"
261+
)
260262

261263
# Push changes
262264
shared.push_changes(PATHS["repo"])

0 commit comments

Comments
 (0)