29
29
# Set the current working directory
30
30
CWD = os .path .dirname (os .path .abspath (__file__ ))
31
31
32
- # Set up the logger
33
- LOG = logging .getLogger (__name__ )
34
- LOG .setLevel (logging .INFO )
35
-
36
- # Define both the handler and the formatter
37
- handler = logging .StreamHandler ()
38
- formatter = logging .Formatter (
39
- "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
40
- )
41
-
42
- # Add formatter to the handler
43
- handler .setFormatter (formatter )
44
-
45
- # Add handler to the logger
46
- LOG .addHandler (handler )
47
-
48
- # Log the start of the script execution
49
- LOG .info ("Script execution started." )
50
-
51
32
52
33
def tags_frequency (csv_path , column_names ):
53
34
"""
@@ -60,7 +41,7 @@ def tags_frequency(csv_path, column_names):
60
41
Example: ["tags", "description"]
61
42
62
43
"""
63
- LOG .info ("Generating word cloud based on tags." )
44
+ LOGGER .info ("Generating word cloud based on tags." )
64
45
65
46
df = pd .read_csv (csv_path )
66
47
# Process each column containing tags
@@ -80,7 +61,7 @@ def tags_frequency(csv_path, column_names):
80
61
and str (row ) != ""
81
62
and str (row ) != "nan"
82
63
):
83
- LOG .debug (f"Processing row: { row } " )
64
+ LOGGER .debug (f"Processing row: { row } " )
84
65
if "ChineseinUS.org" in str (row ):
85
66
row = "ChineseinUS"
86
67
list2 += re .split (r"\s|(?<!\d)[,.](?!\d)" , str (row ))
@@ -169,7 +150,7 @@ def time_trend_helper(df):
169
150
Returns:
170
151
- DataFrame: DataFrame with counts of entries per year.
171
152
"""
172
- LOG .info ("Extracting year-wise count of entries." )
153
+ LOGGER .info ("Extracting year-wise count of entries." )
173
154
174
155
year_list = []
175
156
for date_row in df ["dates" ][0 :]:
@@ -197,7 +178,7 @@ def time_trend(csv_path):
197
178
Args:
198
179
- csv_path (str): Path to the CSV file.
199
180
"""
200
- LOG .info ("Generating time trend line graph." )
181
+ LOGGER .info ("Generating time trend line graph." )
201
182
202
183
df = pd .read_csv (csv_path )
203
184
count_df = time_trend_helper (df )
@@ -240,7 +221,7 @@ def time_trend_compile_helper(yearly_count):
240
221
Returns:
241
222
- DataFrame: Filtered yearly count data.
242
223
"""
243
- LOG .info ("Filtering yearly trend data." )
224
+ LOGGER .info ("Filtering yearly trend data." )
244
225
245
226
Years = np .arange (2018 , 2023 )
246
227
yearly_count ["year" ] = list (yearly_count .index )
@@ -250,7 +231,7 @@ def time_trend_compile_helper(yearly_count):
250
231
int (yearly_count ["year" ][num ]) >= 2018
251
232
):
252
233
counts .append (yearly_count ["Counts" ][num ])
253
- LOG .info (f"{ counts } " )
234
+ LOGGER .info (f"{ counts } " )
254
235
final_yearly_count = pd .DataFrame (
255
236
list (zip (Years , counts )), columns = ["Years" , "Yearly_counts" ]
256
237
)
@@ -261,7 +242,7 @@ def time_trend_compile():
261
242
"""
262
243
Compile yearly trends for different licenses and plot them.
263
244
"""
264
- LOG .info ("Compiling yearly trends for different licenses." )
245
+ LOGGER .info ("Compiling yearly trends for different licenses." )
265
246
266
247
license1 = pd .read_csv ("../flickr/dataset/cleaned_license1.csv" )
267
248
license2 = pd .read_csv ("../flickr/dataset/cleaned_license2.csv" )
@@ -320,7 +301,7 @@ def time_trend_compile():
320
301
yearly_count6 = time_trend_compile_helper (yearly_count6 )
321
302
yearly_count9 = time_trend_compile_helper (yearly_count9 )
322
303
yearly_count10 = time_trend_compile_helper (yearly_count10 )
323
- LOG .info (f"{ yearly_count1 } " )
304
+ LOGGER .info (f"{ yearly_count1 } " )
324
305
325
306
# Plot yearly trend for all licenses
326
307
plt .plot (
@@ -409,20 +390,22 @@ def view_compare_helper(df):
409
390
Returns:
410
391
- int: Maximum views.
411
392
"""
412
- LOG .info ("Calculating maximum views of pictures under a license." )
393
+ LOGGER .info ("Calculating maximum views of pictures under a license." )
413
394
414
395
highest_view = int (max (df ["views" ]))
415
396
df = df .sort_values ("views" , ascending = False )
416
- LOG .info (f"DataFrame sorted by views in descending order: { df } " )
417
- LOG .info (f"Maximum views found: { highest_view } " )
397
+ LOGGER .info (f"DataFrame sorted by views in descending order: { df } " )
398
+ LOGGER .info (f"Maximum views found: { highest_view } " )
418
399
return highest_view
419
400
420
401
421
402
def view_compare ():
422
403
"""
423
404
Compare maximum views of pictures under different licenses.
424
405
"""
425
- LOG .info ("Comparing maximum views of pictures under different licenses." )
406
+ LOGGER .info (
407
+ "Comparing maximum views of pictures under different licenses."
408
+ )
426
409
427
410
license1 = pd .read_csv (
428
411
os .path .join (PATH_WORK_DIR , "../flickr/dataset/cleaned_license1.csv" )
@@ -462,7 +445,7 @@ def view_compare():
462
445
maxs = []
463
446
for lic in licenses :
464
447
maxs .append (view_compare_helper (lic ))
465
- LOG .info (f"{ maxs } " )
448
+ LOGGER .info (f"{ maxs } " )
466
449
# Create DataFrame to store license and their maximum views
467
450
temp_data = pd .DataFrame ()
468
451
temp_data ["Licenses" ] = [
@@ -518,7 +501,9 @@ def total_usage():
518
501
"""
519
502
Generate a bar plot showing the total usage of different licenses.
520
503
"""
521
- LOG .info ("Generating bar plot showing total usage of different licenses." )
504
+ LOGGER .info (
505
+ "Generating bar plot showing total usage of different licenses."
506
+ )
522
507
523
508
# Reads the license total file as the input dataset
524
509
df = pd .read_csv (
0 commit comments