3
3
"""
4
4
5
5
# Standard library
6
- import logging
7
- import os .path
6
+ import os
8
7
import re
9
8
import sys
10
9
import traceback
16
15
import pandas as pd
17
16
import plotly .express as px
18
17
import seaborn as sns
19
-
20
- warnings .filterwarnings ("ignore" )
21
-
22
- # Third-party
23
18
from wordcloud import STOPWORDS , WordCloud # noqa: E402
24
19
25
- # Set the current working directory
26
- PATH_WORK_DIR = os .path .dirname (os .path .abspath (__file__ ))
27
-
28
- # Set the current working directory
29
- CWD = os .path .dirname (os .path .abspath (__file__ ))
30
-
31
- # Set up the logger
32
- LOG = logging .getLogger (__name__ )
33
- LOG .setLevel (logging .INFO )
20
+ sys .path .append ("." )
21
+ # First-party/Local
22
+ import quantify # noqa: E402
34
23
35
- # Define both the handler and the formatter
36
- handler = logging .StreamHandler ()
37
- formatter = logging .Formatter (
38
- "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
39
- )
40
-
41
- # Add formatter to the handler
42
- handler .setFormatter (formatter )
43
-
44
- # Add handler to the logger
45
- LOG .addHandler (handler )
24
+ # Warning suppression /!\ Caution /!\
25
+ warnings .filterwarnings ("ignore" )
46
26
47
- # Log the start of the script execution
48
- LOG . info ( "Script execution started." )
27
+ # Setup PATH_WORK_DIR, and LOGGER using quantify.setup()
28
+ _ , PATH_WORK_DIR , _ , _ , LOGGER = quantify . setup ( __file__ )
49
29
50
30
51
31
def tags_frequency (csv_path , column_names ):
@@ -59,7 +39,7 @@ def tags_frequency(csv_path, column_names):
59
39
Example: ["tags", "description"]
60
40
61
41
"""
62
- LOG .info ("Generating word cloud based on tags." )
42
+ LOGGER .info ("Generating word cloud based on tags." )
63
43
64
44
df = pd .read_csv (csv_path )
65
45
# Process each column containing tags
@@ -79,7 +59,7 @@ def tags_frequency(csv_path, column_names):
79
59
and str (row ) != ""
80
60
and str (row ) != "nan"
81
61
):
82
- LOG .debug (f"Processing row: { row } " )
62
+ LOGGER .debug (f"Processing row: { row } " )
83
63
if "ChineseinUS.org" in str (row ):
84
64
row = "ChineseinUS"
85
65
list2 += re .split (r"\s|(?<!\d)[,.](?!\d)" , str (row ))
@@ -168,7 +148,7 @@ def time_trend_helper(df):
168
148
Returns:
169
149
- DataFrame: DataFrame with counts of entries per year.
170
150
"""
171
- LOG .info ("Extracting year-wise count of entries." )
151
+ LOGGER .info ("Extracting year-wise count of entries." )
172
152
173
153
year_list = []
174
154
for date_row in df ["dates" ][0 :]:
@@ -196,7 +176,7 @@ def time_trend(csv_path):
196
176
Args:
197
177
- csv_path (str): Path to the CSV file.
198
178
"""
199
- LOG .info ("Generating time trend line graph." )
179
+ LOGGER .info ("Generating time trend line graph." )
200
180
201
181
df = pd .read_csv (csv_path )
202
182
count_df = time_trend_helper (df )
@@ -239,7 +219,7 @@ def time_trend_compile_helper(yearly_count):
239
219
Returns:
240
220
- DataFrame: Filtered yearly count data.
241
221
"""
242
- LOG .info ("Filtering yearly trend data." )
222
+ LOGGER .info ("Filtering yearly trend data." )
243
223
244
224
Years = np .arange (2018 , 2023 )
245
225
yearly_count ["year" ] = list (yearly_count .index )
@@ -249,7 +229,7 @@ def time_trend_compile_helper(yearly_count):
249
229
int (yearly_count ["year" ][num ]) >= 2018
250
230
):
251
231
counts .append (yearly_count ["Counts" ][num ])
252
- LOG .info (f"{ counts } " )
232
+ LOGGER .info (f"{ counts } " )
253
233
final_yearly_count = pd .DataFrame (
254
234
list (zip (Years , counts )), columns = ["Years" , "Yearly_counts" ]
255
235
)
@@ -260,7 +240,7 @@ def time_trend_compile():
260
240
"""
261
241
Compile yearly trends for different licenses and plot them.
262
242
"""
263
- LOG .info ("Compiling yearly trends for different licenses." )
243
+ LOGGER .info ("Compiling yearly trends for different licenses." )
264
244
265
245
license1 = pd .read_csv ("../flickr/dataset/cleaned_license1.csv" )
266
246
license2 = pd .read_csv ("../flickr/dataset/cleaned_license2.csv" )
@@ -319,7 +299,7 @@ def time_trend_compile():
319
299
yearly_count6 = time_trend_compile_helper (yearly_count6 )
320
300
yearly_count9 = time_trend_compile_helper (yearly_count9 )
321
301
yearly_count10 = time_trend_compile_helper (yearly_count10 )
322
- LOG .info (f"{ yearly_count1 } " )
302
+ LOGGER .info (f"{ yearly_count1 } " )
323
303
324
304
# Plot yearly trend for all licenses
325
305
plt .plot (
@@ -408,20 +388,22 @@ def view_compare_helper(df):
408
388
Returns:
409
389
- int: Maximum views.
410
390
"""
411
- LOG .info ("Calculating maximum views of pictures under a license." )
391
+ LOGGER .info ("Calculating maximum views of pictures under a license." )
412
392
413
393
highest_view = int (max (df ["views" ]))
414
394
df = df .sort_values ("views" , ascending = False )
415
- LOG .info (f"DataFrame sorted by views in descending order: { df } " )
416
- LOG .info (f"Maximum views found: { highest_view } " )
395
+ LOGGER .info (f"DataFrame sorted by views in descending order: { df } " )
396
+ LOGGER .info (f"Maximum views found: { highest_view } " )
417
397
return highest_view
418
398
419
399
420
400
def view_compare ():
421
401
"""
422
402
Compare maximum views of pictures under different licenses.
423
403
"""
424
- LOG .info ("Comparing maximum views of pictures under different licenses." )
404
+ LOGGER .info (
405
+ "Comparing maximum views of pictures under different licenses."
406
+ )
425
407
426
408
license1 = pd .read_csv (
427
409
os .path .join (PATH_WORK_DIR , "../flickr/dataset/cleaned_license1.csv" )
@@ -461,7 +443,7 @@ def view_compare():
461
443
maxs = []
462
444
for lic in licenses :
463
445
maxs .append (view_compare_helper (lic ))
464
- LOG .info (f"{ maxs } " )
446
+ LOGGER .info (f"{ maxs } " )
465
447
# Create DataFrame to store license and their maximum views
466
448
temp_data = pd .DataFrame ()
467
449
temp_data ["Licenses" ] = [
@@ -517,7 +499,9 @@ def total_usage():
517
499
"""
518
500
Generate a bar plot showing the total usage of different licenses.
519
501
"""
520
- LOG .info ("Generating bar plot showing total usage of different licenses." )
502
+ LOGGER .info (
503
+ "Generating bar plot showing total usage of different licenses."
504
+ )
521
505
522
506
# Reads the license total file as the input dataset
523
507
df = pd .read_csv (
@@ -538,15 +522,14 @@ def main():
538
522
539
523
540
524
if __name__ == "__main__" :
541
- # Exception Handling
542
525
try :
543
526
main ()
544
527
except SystemExit as e :
545
- LOG .error (f"System exit with code: { e .code } " )
528
+ LOGGER .error (f"System exit with code: { e .code } " )
546
529
sys .exit (e .code )
547
530
except KeyboardInterrupt :
548
- LOG .info ("(130) Halted via KeyboardInterrupt." )
531
+ LOGGER .info ("(130) Halted via KeyboardInterrupt." )
549
532
sys .exit (130 )
550
533
except Exception :
551
- LOG . error (f"(1) Unhandled exception: { traceback .format_exc ()} " )
534
+ LOGGER . exception (f"(1) Unhandled exception: { traceback .format_exc ()} " )
552
535
sys .exit (1 )
0 commit comments