3
3
"""
4
4
5
5
# Standard library
6
+ import logging
6
7
import os .path
7
8
import re
8
9
import sys
24
25
# Set the current working directory
25
26
PATH_WORK_DIR = os .path .dirname (os .path .abspath (__file__ ))
26
27
28
+ # Set the current working directory
29
+ CWD = os .path .dirname (os .path .abspath (__file__ ))
30
+
31
+ # Set up the logger
32
+ LOG = logging .getLogger (__name__ )
33
+ LOG .setLevel (logging .INFO )
34
+
35
+ # Define both the handler and the formatter
36
+ handler = logging .StreamHandler ()
37
+ formatter = logging .Formatter (
38
+ "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
39
+ )
40
+
41
+ # Add formatter to the handler
42
+ handler .setFormatter (formatter )
43
+
44
+ # Add handler to the logger
45
+ LOG .addHandler (handler )
46
+
47
+ # Log the start of the script execution
48
+ LOG .info ("Script execution started." )
49
+
27
50
28
51
def tags_frequency (csv_path , column_names ):
29
52
"""
@@ -36,6 +59,8 @@ def tags_frequency(csv_path, column_names):
36
59
Example: ["tags", "description"]
37
60
38
61
"""
62
+ LOG .info ("Generating word cloud based on tags." )
63
+
39
64
df = pd .read_csv (csv_path )
40
65
# Process each column containing tags
41
66
for column_name in column_names :
@@ -54,7 +79,7 @@ def tags_frequency(csv_path, column_names):
54
79
and str (row ) != ""
55
80
and str (row ) != "nan"
56
81
):
57
- print ( str ( row ) )
82
+ LOG . debug ( f"Processing row: { row } " )
58
83
if "ChineseinUS.org" in str (row ):
59
84
row = "ChineseinUS"
60
85
list2 += re .split (r"\s|(?<!\d)[,.](?!\d)" , str (row ))
@@ -143,6 +168,8 @@ def time_trend_helper(df):
143
168
Returns:
144
169
- DataFrame: DataFrame with counts of entries per year.
145
170
"""
171
+ LOG .info ("Extracting year-wise count of entries." )
172
+
146
173
year_list = []
147
174
for date_row in df ["dates" ][0 :]:
148
175
date_list = str (date_row ).split ()
@@ -169,6 +196,8 @@ def time_trend(csv_path):
169
196
Args:
170
197
- csv_path (str): Path to the CSV file.
171
198
"""
199
+ LOG .info ("Generating time trend line graph." )
200
+
172
201
df = pd .read_csv (csv_path )
173
202
count_df = time_trend_helper (df )
174
203
@@ -210,6 +239,8 @@ def time_trend_compile_helper(yearly_count):
210
239
Returns:
211
240
- DataFrame: Filtered yearly count data.
212
241
"""
242
+ LOG .info ("Filtering yearly trend data." )
243
+
213
244
Years = np .arange (2018 , 2023 )
214
245
yearly_count ["year" ] = list (yearly_count .index )
215
246
counts = []
@@ -218,7 +249,7 @@ def time_trend_compile_helper(yearly_count):
218
249
int (yearly_count ["year" ][num ]) >= 2018
219
250
):
220
251
counts .append (yearly_count ["Counts" ][num ])
221
- print ( counts )
252
+ LOG . info ( f" { counts } " )
222
253
final_yearly_count = pd .DataFrame (
223
254
list (zip (Years , counts )), columns = ["Years" , "Yearly_counts" ]
224
255
)
@@ -229,6 +260,8 @@ def time_trend_compile():
229
260
"""
230
261
Compile yearly trends for different licenses and plot them.
231
262
"""
263
+ LOG .info ("Compiling yearly trends for different licenses." )
264
+
232
265
license1 = pd .read_csv ("../flickr/dataset/cleaned_license1.csv" )
233
266
license2 = pd .read_csv ("../flickr/dataset/cleaned_license2.csv" )
234
267
license3 = pd .read_csv ("../flickr/dataset/cleaned_license3.csv" )
@@ -286,7 +319,7 @@ def time_trend_compile():
286
319
yearly_count6 = time_trend_compile_helper (yearly_count6 )
287
320
yearly_count9 = time_trend_compile_helper (yearly_count9 )
288
321
yearly_count10 = time_trend_compile_helper (yearly_count10 )
289
- print ( yearly_count1 )
322
+ LOG . info ( f" { yearly_count1 } " )
290
323
291
324
# Plot yearly trend for all licenses
292
325
plt .plot (
@@ -375,17 +408,21 @@ def view_compare_helper(df):
375
408
Returns:
376
409
- int: Maximum views.
377
410
"""
411
+ LOG .info ("Calculating maximum views of pictures under a license." )
412
+
378
413
highest_view = int (max (df ["views" ]))
379
414
df = df .sort_values ("views" , ascending = False )
415
+ LOG .info (f"DataFrame sorted by views in descending order: { df } " )
416
+ LOG .info (f"Maximum views found: { highest_view } " )
380
417
return highest_view
381
- print (df )
382
- print (highest_view )
383
418
384
419
385
420
def view_compare ():
386
421
"""
387
422
Compare maximum views of pictures under different licenses.
388
423
"""
424
+ LOG .info ("Comparing maximum views of pictures under different licenses." )
425
+
389
426
license1 = pd .read_csv (
390
427
os .path .join (PATH_WORK_DIR , "../flickr/dataset/cleaned_license1.csv" )
391
428
)
@@ -424,7 +461,7 @@ def view_compare():
424
461
maxs = []
425
462
for lic in licenses :
426
463
maxs .append (view_compare_helper (lic ))
427
- print ( maxs )
464
+ LOG . info ( f" { maxs } " )
428
465
# Create DataFrame to store license and their maximum views
429
466
temp_data = pd .DataFrame ()
430
467
temp_data ["Licenses" ] = [
@@ -480,6 +517,8 @@ def total_usage():
480
517
"""
481
518
Generate a bar plot showing the total usage of different licenses.
482
519
"""
520
+ LOG .info ("Generating bar plot showing total usage of different licenses." )
521
+
483
522
# Reads the license total file as the input dataset
484
523
df = pd .read_csv (
485
524
os .path .join (PATH_WORK_DIR , "../flickr/dataset/license_total.csv" )
@@ -499,15 +538,15 @@ def main():
499
538
500
539
501
540
if __name__ == "__main__" :
502
- # Exception handling
541
+ # Exception Handling
503
542
try :
504
543
main ()
505
544
except SystemExit as e :
545
+ LOG .error (f"System exit with code: { e .code } " )
506
546
sys .exit (e .code )
507
547
except KeyboardInterrupt :
508
- print ( "INFO (130) Halted via KeyboardInterrupt.", file = sys . stderr )
548
+ LOG . info ( " (130) Halted via KeyboardInterrupt." )
509
549
sys .exit (130 )
510
550
except Exception :
511
- print ("ERROR (1) Unhandled exception:" , file = sys .stderr )
512
- print (traceback .print_exc (), file = sys .stderr )
513
- sys .exit (1 )
551
+ LOG .error (f"(1) Unhandled exception: { traceback .format_exc ()} " )
552
+ sys .exit (1 )
0 commit comments