Skip to content

Commit 9edade9

Browse files
authored
Merge pull request #97 from naishasinha/main
Integrate Python logging module into all .py files of Quantifying + README Update for Logging
2 parents 53e5b4e + 2d58fb2 commit 9edade9

13 files changed

+556
-99
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,15 @@ directories to check:
133133
- [ppypa/pipenv][pipenv]: _Python Development Workflow for Humans._
134134
- [pre-commit][pre-commit]: _A framework for managing and maintaining
135135
multi-language pre-commit hooks._
136+
- [Logging][logging]: _Built-in Python logging module to implement a flexible logging system across shared modules._
136137

137138
[ccospyguide]: https://opensource.creativecommons.org/contributing-code/python-guidelines/
138139
[black]: https://github.com/psf/black
139140
[flake8]: https://github.com/PyCQA/flake8
140141
[isort]: https://pycqa.github.io/isort/
141142
[pipenv]: https://github.com/pypa/pipenv
142143
[pre-commit]: https://pre-commit.com/
144+
[logging]: https://docs.python.org/3/howto/logging.html
143145

144146

145147
### GitHub Actions

analyze/data_analysis.py

+50-11
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
# Standard library
6+
import logging
67
import os.path
78
import re
89
import sys
@@ -24,6 +25,28 @@
2425
# Set the current working directory
2526
PATH_WORK_DIR = os.path.dirname(os.path.abspath(__file__))
2627

28+
# Set the current working directory
29+
CWD = os.path.dirname(os.path.abspath(__file__))
30+
31+
# Set up the logger
32+
LOG = logging.getLogger(__name__)
33+
LOG.setLevel(logging.INFO)
34+
35+
# Define both the handler and the formatter
36+
handler = logging.StreamHandler()
37+
formatter = logging.Formatter(
38+
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
39+
)
40+
41+
# Add formatter to the handler
42+
handler.setFormatter(formatter)
43+
44+
# Add handler to the logger
45+
LOG.addHandler(handler)
46+
47+
# Log the start of the script execution
48+
LOG.info("Script execution started.")
49+
2750

2851
def tags_frequency(csv_path, column_names):
2952
"""
@@ -36,6 +59,8 @@ def tags_frequency(csv_path, column_names):
3659
Example: ["tags", "description"]
3760
3861
"""
62+
LOG.info("Generating word cloud based on tags.")
63+
3964
df = pd.read_csv(csv_path)
4065
# Process each column containing tags
4166
for column_name in column_names:
@@ -54,7 +79,7 @@ def tags_frequency(csv_path, column_names):
5479
and str(row) != ""
5580
and str(row) != "nan"
5681
):
57-
print(str(row))
82+
LOG.debug(f"Processing row: {row}")
5883
if "ChineseinUS.org" in str(row):
5984
row = "ChineseinUS"
6085
list2 += re.split(r"\s|(?<!\d)[,.](?!\d)", str(row))
@@ -143,6 +168,8 @@ def time_trend_helper(df):
143168
Returns:
144169
- DataFrame: DataFrame with counts of entries per year.
145170
"""
171+
LOG.info("Extracting year-wise count of entries.")
172+
146173
year_list = []
147174
for date_row in df["dates"][0:]:
148175
date_list = str(date_row).split()
@@ -169,6 +196,8 @@ def time_trend(csv_path):
169196
Args:
170197
- csv_path (str): Path to the CSV file.
171198
"""
199+
LOG.info("Generating time trend line graph.")
200+
172201
df = pd.read_csv(csv_path)
173202
count_df = time_trend_helper(df)
174203

@@ -210,6 +239,8 @@ def time_trend_compile_helper(yearly_count):
210239
Returns:
211240
- DataFrame: Filtered yearly count data.
212241
"""
242+
LOG.info("Filtering yearly trend data.")
243+
213244
Years = np.arange(2018, 2023)
214245
yearly_count["year"] = list(yearly_count.index)
215246
counts = []
@@ -218,7 +249,7 @@ def time_trend_compile_helper(yearly_count):
218249
int(yearly_count["year"][num]) >= 2018
219250
):
220251
counts.append(yearly_count["Counts"][num])
221-
print(counts)
252+
LOG.info(f"{counts}")
222253
final_yearly_count = pd.DataFrame(
223254
list(zip(Years, counts)), columns=["Years", "Yearly_counts"]
224255
)
@@ -229,6 +260,8 @@ def time_trend_compile():
229260
"""
230261
Compile yearly trends for different licenses and plot them.
231262
"""
263+
LOG.info("Compiling yearly trends for different licenses.")
264+
232265
license1 = pd.read_csv("../flickr/dataset/cleaned_license1.csv")
233266
license2 = pd.read_csv("../flickr/dataset/cleaned_license2.csv")
234267
license3 = pd.read_csv("../flickr/dataset/cleaned_license3.csv")
@@ -286,7 +319,7 @@ def time_trend_compile():
286319
yearly_count6 = time_trend_compile_helper(yearly_count6)
287320
yearly_count9 = time_trend_compile_helper(yearly_count9)
288321
yearly_count10 = time_trend_compile_helper(yearly_count10)
289-
print(yearly_count1)
322+
LOG.info(f"{yearly_count1}")
290323

291324
# Plot yearly trend for all licenses
292325
plt.plot(
@@ -375,17 +408,21 @@ def view_compare_helper(df):
375408
Returns:
376409
- int: Maximum views.
377410
"""
411+
LOG.info("Calculating maximum views of pictures under a license.")
412+
378413
highest_view = int(max(df["views"]))
379414
df = df.sort_values("views", ascending=False)
415+
LOG.info(f"DataFrame sorted by views in descending order: {df}")
416+
LOG.info(f"Maximum views found: {highest_view}")
380417
return highest_view
381-
print(df)
382-
print(highest_view)
383418

384419

385420
def view_compare():
386421
"""
387422
Compare maximum views of pictures under different licenses.
388423
"""
424+
LOG.info("Comparing maximum views of pictures under different licenses.")
425+
389426
license1 = pd.read_csv(
390427
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license1.csv")
391428
)
@@ -424,7 +461,7 @@ def view_compare():
424461
maxs = []
425462
for lic in licenses:
426463
maxs.append(view_compare_helper(lic))
427-
print(maxs)
464+
LOG.info(f"{maxs}")
428465
# Create DataFrame to store license and their maximum views
429466
temp_data = pd.DataFrame()
430467
temp_data["Licenses"] = [
@@ -480,6 +517,8 @@ def total_usage():
480517
"""
481518
Generate a bar plot showing the total usage of different licenses.
482519
"""
520+
LOG.info("Generating bar plot showing total usage of different licenses.")
521+
483522
# Reads the license total file as the input dataset
484523
df = pd.read_csv(
485524
os.path.join(PATH_WORK_DIR, "../flickr/dataset/license_total.csv")
@@ -499,15 +538,15 @@ def main():
499538

500539

501540
if __name__ == "__main__":
502-
# Exception handling
541+
# Exception Handling
503542
try:
504543
main()
505544
except SystemExit as e:
545+
LOG.error(f"System exit with code: {e.code}")
506546
sys.exit(e.code)
507547
except KeyboardInterrupt:
508-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
548+
LOG.info("(130) Halted via KeyboardInterrupt.")
509549
sys.exit(130)
510550
except Exception:
511-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
512-
print(traceback.print_exc(), file=sys.stderr)
513-
sys.exit(1)
551+
LOG.error(f"(1) Unhandled exception: {traceback.format_exc()}")
552+
sys.exit(1)

deviantart/deviantart_scratcher.py

+41-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
data.
55
"""
66
# Standard library
7+
import logging
78
import os
89
import sys
910
import traceback
@@ -36,6 +37,25 @@
3637
# Retrieve Programmable Search Engine key from environment variables
3738
PSE_KEY = os.getenv("PSE_KEY")
3839

40+
# Set up the logger
41+
LOG = logging.getLogger(__name__)
42+
LOG.setLevel(logging.INFO)
43+
44+
# Define both the handler and the formatter
45+
handler = logging.StreamHandler()
46+
formatter = logging.Formatter(
47+
"%(asctime)s - %(levelname)s - %(name)s - %(message)s"
48+
)
49+
50+
# Add formatter to the handler
51+
handler.setFormatter(formatter)
52+
53+
# Add handler to the logger
54+
LOG.addHandler(handler)
55+
56+
# Log the start of the script execution
57+
LOG.info("Script execution started.")
58+
3959

4060
def get_license_list():
4161
"""
@@ -45,6 +65,8 @@ def get_license_list():
4565
- np.array: An array containing all license types that should be
4666
searched via Programmable Search Engine.
4767
"""
68+
LOG.info("Retrieving list of license from Creative Commons' record.")
69+
4870
# Read license data from file
4971
cc_license_data = pd.read_csv(
5072
os.path.join(PATH_WORK_DIR, "legal-tool-paths.txt"), header=None
@@ -70,6 +92,8 @@ def get_request_url(license):
7092
Returns:
7193
- str: The API Endpoint URL for the query specified by parameters.
7294
"""
95+
LOG.info(f"Generating API Endpoint URL for specified license: {license}")
96+
7397
try:
7498
api_key = API_KEYS[API_KEYS_IND]
7599
return (
@@ -80,7 +104,7 @@ def get_request_url(license):
80104
)
81105
except Exception as e:
82106
if isinstance(e, IndexError):
83-
print("Depleted all API Keys provided", file=sys.stderr)
107+
LOG.exception("Depleted all API Keys provided")
84108
else:
85109
raise e
86110

@@ -97,6 +121,8 @@ def get_response_elems(license):
97121
- dict: A dictionary mapping metadata to its value provided from the API
98122
query.
99123
"""
124+
LOG.info("Making a request to the API and handling potential retries.")
125+
100126
try:
101127
# Make a request to the API and handle potential retries
102128
request_url = get_request_url(license)
@@ -120,16 +146,16 @@ def get_response_elems(license):
120146
# If quota limit exceeded, switch to the next API key
121147
global API_KEYS_IND
122148
API_KEYS_IND += 1
123-
print(
124-
"Changing API KEYS due to depletion of quota", file=sys.stderr
125-
)
149+
LOG.exception("Changing API KEYS due to depletion of quota")
126150
return get_response_elems(license)
127151
else:
128152
raise e
129153

130154

131155
def set_up_data_file():
132156
"""Writes the header row to the file to contain DeviantArt data."""
157+
LOG.info("Setting up data file by writing the header row.")
158+
133159
header_title = "LICENSE TYPE,Document Count"
134160
with open(DATA_WRITE_FILE, "w") as f:
135161
f.write(f"{header_title}\n")
@@ -142,6 +168,11 @@ def record_license_data(license_type):
142168
It's a segment of the URL towards the license description. If not provided,
143169
it defaults to None, indicating no assumption about the license type.
144170
"""
171+
LOG.info(
172+
"Writing the row for license type %s to contain DeviantArt data",
173+
license_type,
174+
)
175+
145176
data_log = (
146177
f"{license_type},"
147178
f"{get_response_elems(license_type)['totalResults']}"
@@ -156,6 +187,8 @@ def record_all_licenses():
156187
list and writes this data into the DATA_WRITE_FILE, as specified by the
157188
constant.
158189
"""
190+
LOG.info("Recording data for all available license types.")
191+
159192
# Get the list of license types
160193
license_list = get_license_list()
161194
# Record data for each license types
@@ -169,14 +202,15 @@ def main():
169202

170203

171204
if __name__ == "__main__":
205+
# Exception Handling
172206
try:
173207
main()
174208
except SystemExit as e:
209+
LOG.error(f"System exit with code: {e.code}")
175210
sys.exit(e.code)
176211
except KeyboardInterrupt:
177-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
212+
LOG.info("(130) Halted via KeyboardInterrupt.")
178213
sys.exit(130)
179214
except Exception:
180-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
181-
print(traceback.print_exc(), file=sys.stderr)
215+
LOG.error(f"(1) Unhandled exception: {traceback.format_exc()}")
182216
sys.exit(1)

0 commit comments

Comments
 (0)