@@ -260,11 +260,11 @@ def process_totals_by_restrictions(args, count_data):
260
260
data_to_csv (args , data , file_path )
261
261
262
262
263
- def process_totals_by_langauage (args , data ):
263
+ def process_totals_by_language (args , data ):
264
264
"""
265
265
Processing language data: totals by language
266
266
"""
267
- LOGGER .info (process_totals_by_langauage .__doc__ .strip ())
267
+ LOGGER .info (process_totals_by_language .__doc__ .strip ())
268
268
data = data .groupby (["LANGUAGE" ], as_index = False )["COUNT" ].sum ()
269
269
data = data .sort_values ("COUNT" , ascending = False )
270
270
data .reset_index (drop = True , inplace = True )
@@ -276,7 +276,7 @@ def process_totals_by_langauage(args, data):
276
276
inplace = True ,
277
277
)
278
278
file_path = shared .path_join (
279
- PATHS ["data_phase" ], "gcs_totals_by_langauage .csv"
279
+ PATHS ["data_phase" ], "gcs_totals_by_language .csv"
280
280
)
281
281
data_to_csv (args , data , file_path )
282
282
@@ -302,28 +302,6 @@ def process_totals_by_country(args, data):
302
302
data_to_csv (args , data , file_path )
303
303
304
304
305
- # Data is already limited to licenses 4.0, CC0, and PDM
306
- #
307
- # def process_license_40_totals_by_langauage(args, data):
308
- # LOGGER.info("Processing language data: top 25 languages")
309
- # data = data[data["TOOL_IDENTIFIER"].str.contains("CC BY")]
310
- # data = data[data["TOOL_IDENTIFIER"].str.contains("4.0")]
311
- # data = data.groupby(["LANGUAGE"], as_index=False)['COUNT'].sum()
312
- # data = data.sort_values("COUNT", ascending=False)
313
- # data.reset_index(drop=True, inplace=True)
314
- # data.rename(
315
- # columns={
316
- # "LANGUAGE": "Language",
317
- # "COUNT": "Count",
318
- # },
319
- # inplace=True,
320
- # )
321
- # file_path = shared.path_join(
322
- # PATHS["data_phase"], "gcs_license_40_totals_by_langauage.csv"
323
- # )
324
- # data_to_csv(args, data, file_path)
325
-
326
-
327
305
# def load_quarter_data(quarter):
328
306
# """
329
307
# Load data for a specific quarter.
@@ -348,7 +326,7 @@ def process_totals_by_country(args, data):
348
326
349
327
# # Process the data to compare by country
350
328
# compare_by_country(current_data, previous_data,
351
- # current_quarter, previous_quarter)
329
+ # current_quarter, previous_quarter)
352
330
353
331
# # Process the data to compare by license
354
332
# compare_by_license(current_data, previous_data,
@@ -360,7 +338,7 @@ def process_totals_by_country(args, data):
360
338
361
339
362
340
# def compare_by_country(current_data, previous_data,
363
- # current_quarter, previous_quarter):
341
+ # current_quarter, previous_quarter):
364
342
# """
365
343
# Compare the number of webpages licensed by country between two quarters.
366
344
# """
@@ -460,8 +438,7 @@ def main():
460
438
language_data = pd .read_csv (
461
439
FILE2_LANGUAGE , usecols = ["TOOL_IDENTIFIER" , "LANGUAGE" , "COUNT" ]
462
440
)
463
- process_totals_by_langauage (args , language_data )
464
- # process_license_40_totals_by_langauage(args, language_data)
441
+ process_totals_by_language (args , language_data )
465
442
466
443
# Country data
467
444
country_data = pd .read_csv (
0 commit comments