Skip to content

Commit f73d86a

Browse files
committed
All Files Loggers add with shared and clean code
1 parent 8c4b9b5 commit f73d86a

File tree

12 files changed

+238
-173
lines changed

12 files changed

+238
-173
lines changed

analyze/data_analysis.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
"""
44

55
# Standard library
6-
import os.path
6+
import os
77
import re
88
import sys
9-
import traceback
109
import warnings
1110

1211
# Third-party
@@ -15,14 +14,17 @@
1514
import pandas as pd
1615
import plotly.express as px
1716
import seaborn as sns
17+
from wordcloud import STOPWORDS, WordCloud # noqa: E402
1818

19-
warnings.filterwarnings("ignore")
19+
# First-party/Local
20+
import quantify
2021

21-
# Third-party
22-
from wordcloud import STOPWORDS, WordCloud # noqa: E402
22+
# Warning suppression /!\ Caution /!\
23+
warnings.filterwarnings("ignore")
2324

24-
# Set the current working directory
25-
CWD = os.path.dirname(os.path.abspath(__file__))
25+
# Setup PATH_WORK_DIR, and LOGGER using quantify.setup()
26+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
27+
_, PATH_WORK_DIR, _, _, LOGGER = quantify.setup(__file__)
2628

2729

2830
def tags_frequency(csv_path, column_names):
@@ -126,7 +128,7 @@ def tags_frequency(csv_path, column_names):
126128
fontweight="bold",
127129
)
128130
plt.savefig(
129-
os.path.join(CWD, "wordCloud_plots/license1_wordCloud.png"),
131+
os.path.join(PATH_WORK_DIR, "wordCloud_plots/license1_wordCloud.png"),
130132
dpi=300,
131133
bbox_inches="tight",
132134
)
@@ -193,7 +195,7 @@ def time_trend(csv_path):
193195
plt.xlabel("Day", fontsize=10)
194196
plt.ylabel("Amount", fontsize=10)
195197
plt.savefig(
196-
os.path.join(CWD, "line_graphs/license5_total_trend.png"),
198+
os.path.join(PATH_WORK_DIR, "line_graphs/license5_total_trend.png"),
197199
dpi=300,
198200
bbox_inches="tight",
199201
)
@@ -387,28 +389,28 @@ def view_compare():
387389
Compare maximum views of pictures under different licenses.
388390
"""
389391
license1 = pd.read_csv(
390-
os.path.join(CWD, "../flickr/dataset/cleaned_license1.csv")
392+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license1.csv")
391393
)
392394
license2 = pd.read_csv(
393-
os.path.join(CWD, "../flickr/dataset/cleaned_license2.csv")
395+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license2.csv")
394396
)
395397
license3 = pd.read_csv(
396-
os.path.join(CWD, "../flickr/dataset/cleaned_license3.csv")
398+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license3.csv")
397399
)
398400
license4 = pd.read_csv(
399-
os.path.join(CWD, "../flickr/dataset/cleaned_license4.csv")
401+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license4.csv")
400402
)
401403
license5 = pd.read_csv(
402-
os.path.join(CWD, "../flickr/dataset/cleaned_license5.csv")
404+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license5.csv")
403405
)
404406
license6 = pd.read_csv(
405-
os.path.join(CWD, "../flickr/dataset/cleaned_license6.csv")
407+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license6.csv")
406408
)
407409
license9 = pd.read_csv(
408-
os.path.join(CWD, "../flickr/dataset/cleaned_license9.csv")
410+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license9.csv")
409411
)
410412
license10 = pd.read_csv(
411-
os.path.join(CWD, "../flickr/dataset/cleaned_license10.csv")
413+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/cleaned_license10.csv")
412414
)
413415
licenses = [
414416
license1,
@@ -469,7 +471,7 @@ def view_compare():
469471
current_values = plt.gca().get_yticks()
470472
plt.gca().set_yticklabels(["{:,.0f}".format(x) for x in current_values])
471473
plt.savefig(
472-
os.path.join(CWD, "../analyze/compare_graphs/max_views.png"),
474+
os.path.join(PATH_WORK_DIR, "../analyze/compare_graphs/max_views.png"),
473475
dpi=300,
474476
bbox_inches="tight",
475477
)
@@ -481,29 +483,32 @@ def total_usage():
481483
Generate a bar plot showing the total usage of different licenses.
482484
"""
483485
# Reads the license total file as the input dataset
484-
df = pd.read_csv(os.path.join(CWD, "../flickr/dataset/license_total.csv"))
486+
df = pd.read_csv(
487+
os.path.join(PATH_WORK_DIR, "../flickr/dataset/license_total.csv")
488+
)
485489
df["License"] = [str(x) for x in list(df["License"])]
486490
fig = px.bar(df, x="License", y="Total amount", color="License")
487-
fig.write_html(os.path.join(CWD, "../analyze/total_usage.html"))
491+
fig.write_html(os.path.join(PATH_WORK_DIR, "../analyze/total_usage.html"))
488492
# fig.show()
489493

490494

491495
def main():
492-
tags_frequency(os.path.join(CWD, "merged_all_cleaned.csv"), ["tags"])
496+
tags_frequency(
497+
os.path.join(PATH_WORK_DIR, "merged_all_cleaned.csv"), ["tags"]
498+
)
493499
# df = pd.read_csv("../flickr/dataset/cleaned_license10.csv")
494500
# print(df.shape)
495501

496502

497503
if __name__ == "__main__":
498-
# Exception handling
499504
try:
500505
main()
501506
except SystemExit as e:
507+
LOGGER.error("System exit with code: %d", e.code)
502508
sys.exit(e.code)
503509
except KeyboardInterrupt:
504-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
510+
LOGGER.info("Halted via KeyboardInterrupt.")
505511
sys.exit(130)
506512
except Exception:
507-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
508-
print(traceback.print_exc(), file=sys.stderr)
509-
sys.exit(1)
513+
LOGGER.exception("Unhandled exception:")
514+
sys.exit(1)

deviantart/deviantart_scratcher.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,15 @@
1818
# First-party/Local
1919
import quantify
2020

21+
# Setup paths, Date and LOGGER using quantify.setup()
2122
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
22-
PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, Today, logger = quantify.setup(
23-
__file__
23+
PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY, LOGGER = (
24+
quantify.setup(__file__)
2425
)
2526

2627
# Load environment variables
2728
load_dotenv(PATH_DOTENV)
2829

29-
# Set up file path for CSV report
30-
DATA_WRITE_FILE = (
31-
f"{PATH_WORK_DIR}"
32-
f"/data_deviantart_{Today.year}_{Today.month}_{Today.day}.csv"
33-
)
3430

3531
# Global Variable for API_KEYS indexing
3632
API_KEYS_IND = 0
@@ -39,6 +35,13 @@
3935
API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
4036
PSE_KEY = os.getenv("PSE_KEY")
4137

38+
# Set up file path for CSV report
39+
DATA_WRITE_FILE = (
40+
f"{PATH_WORK_DIR}"
41+
f"/data_deviantart_"
42+
f"{DATETIME_TODAY.year}_{DATETIME_TODAY.month}_{DATETIME_TODAY.day}.csv"
43+
)
44+
4245

4346
def get_license_list():
4447
"""
@@ -84,7 +87,7 @@ def get_request_url(license):
8487
)
8588
except Exception as e:
8689
if isinstance(e, IndexError):
87-
logger.error("Depleted all API Keys provided")
90+
LOGGER.error("Depleted all API Keys provided")
8891
else:
8992
raise e
9093

@@ -124,7 +127,7 @@ def get_response_elems(license):
124127
# If quota limit exceeded, switch to the next API key
125128
global API_KEYS_IND
126129
API_KEYS_IND += 1
127-
logger.error("Changing API KEYS due to depletion of quota")
130+
LOGGER.error("Changing API KEYS due to depletion of quota")
128131
return get_response_elems(license)
129132
else:
130133
raise e
@@ -175,10 +178,11 @@ def main():
175178
try:
176179
main()
177180
except SystemExit as e:
181+
LOGGER.error("System exit with code: %d", e.code)
178182
sys.exit(e.code)
179183
except KeyboardInterrupt:
180-
logger.info("Halted via KeyboardInterrupt.")
184+
LOGGER.info("Halted via KeyboardInterrupt.")
181185
sys.exit(130)
182186
except Exception:
183-
logger.exception("Unhandled exception:")
187+
LOGGER.exception("Unhandled exception:")
184188
sys.exit(1)

flickr/data_cleaning.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,19 @@
1313
"""
1414

1515
# Standard library
16+
import os
1617
import sys
17-
import traceback
1818

1919
# Third-party
2020
import pandas as pd
2121

22+
# First-party/Local
23+
import quantify
24+
25+
# Setup only LOGGER using quantify.setup()
26+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
27+
_, _, _, _, LOGGER = quantify.setup(__file__)
28+
2229

2330
def drop_empty_column(csv_path, new_csv_path):
2431
"""
@@ -93,11 +100,11 @@ def main():
93100
try:
94101
main()
95102
except SystemExit as e:
103+
LOGGER.error("System exit with code: %d", e.code)
96104
sys.exit(e.code)
97105
except KeyboardInterrupt:
98-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
106+
LOGGER.info("Halted via KeyboardInterrupt.")
99107
sys.exit(130)
100108
except Exception:
101-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
102-
print(traceback.print_exc(), file=sys.stderr)
103-
sys.exit(1)
109+
LOGGER.exception("Unhandled exception:")
110+
sys.exit(1)

flickr/photos.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,23 @@
66
# Standard library
77
import json
88
import os
9-
import os.path
109
import sys
11-
import traceback
1210

1311
# Third-party
1412
import flickrapi
1513
from dotenv import load_dotenv
1614

17-
# Get the current working directory
18-
CWD = os.path.dirname(os.path.abspath(__file__))
15+
# First-party/Local
16+
import quantify
17+
18+
# Setup paths, Date and LOGGER using quantify.setup()
19+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
20+
PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY, LOGGER = (
21+
quantify.setup(__file__)
22+
)
23+
1924
# Load environment variables
20-
dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
21-
load_dotenv(dotenv_path)
25+
load_dotenv(PATH_DOTENV)
2226

2327

2428
def main():
@@ -37,19 +41,19 @@ def main():
3741
photosJson = flickr.photos.search(license=i, per_page=500)
3842
dic[i] = [json.loads(photosJson.decode("utf-8"))]
3943
# Save the dictionary containing photo data to a JSON file
40-
with open(os.path.join(CWD, "photos.json"), "w") as json_file:
44+
with open(os.path.join(PATH_WORK_DIR, "photos.json"), "w") as json_file:
4145
json.dump(dic, json_file)
4246

4347

4448
if __name__ == "__main__":
4549
try:
4650
main()
4751
except SystemExit as e:
52+
LOGGER.error("System exit with code: %d", e.code)
4853
sys.exit(e.code)
4954
except KeyboardInterrupt:
50-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
55+
LOGGER.info("Halted via KeyboardInterrupt.")
5156
sys.exit(130)
5257
except Exception:
53-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
54-
print(traceback.print_exc(), file=sys.stderr)
55-
sys.exit(1)
58+
LOGGER.exception("Unhandled exception:")
59+
sys.exit(1)

flickr/photos_detail.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,25 @@
1010
# Standard library
1111
import json
1212
import os
13-
import os.path
1413
import sys
1514
import time
16-
import traceback
1715

1816
# Third-party
1917
import flickrapi
2018
import pandas as pd
2119
from dotenv import load_dotenv
2220

23-
# Set up current working directory
24-
CWD = os.path.dirname(os.path.abspath(__file__))
21+
# First-party/Local
22+
import quantify
23+
24+
# Setup paths, Date and LOGGER using quantify.setup()
25+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
26+
PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY, LOGGER = (
27+
quantify.setup(__file__)
28+
)
29+
2530
# Load environment variables
26-
dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
27-
load_dotenv(dotenv_path)
31+
load_dotenv(PATH_DOTENV)
2832

2933
# Global variable: Number of retries for error handling
3034
RETRIES = 0
@@ -188,9 +192,9 @@ def page1_reset(final_csv, raw_data):
188192

189193

190194
def main():
191-
final_csv_path = os.path.join(CWD, "final.csv")
192-
record_txt_path = os.path.join(CWD, "rec.txt")
193-
hs_csv_path = os.path.join(CWD, "hs.csv")
195+
final_csv_path = os.path.join(PATH_WORK_DIR, "final.csv")
196+
record_txt_path = os.path.join(PATH_WORK_DIR, "rec.txt")
197+
hs_csv_path = os.path.join(PATH_WORK_DIR, "hs.csv")
194198

195199
# Initialize Flickr API instance
196200
flickr = flickrapi.FlickrAPI(
@@ -290,7 +294,7 @@ def main():
290294
# If reached max limit of pages, reset j to 1 and
291295
# update i to the license in the dictionary
292296
if j == total + 1 or j > total:
293-
license_i_path = os.path.join(CWD, f"license{i}.csv")
297+
license_i_path = os.path.join(PATH_WORK_DIR, f"license{i}.csv")
294298
clean_saveas_csv(final_csv_path, license_i_path)
295299
i += 1
296300
j = 1
@@ -305,18 +309,19 @@ def main():
305309

306310

307311
if __name__ == "__main__":
312+
RETRIES = 0 # Initialize RETRIES counter
308313
while True:
309314
try:
310315
main()
311316
except SystemExit as e:
317+
LOGGER.error("System exit with code: %d", e.code)
312318
sys.exit(e.code)
313319
except KeyboardInterrupt:
314-
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
320+
LOGGER.info("Halted via KeyboardInterrupt.")
315321
sys.exit(130)
316322
except Exception:
317323
RETRIES += 1
318-
print("ERROR (1) Unhandled exception:", file=sys.stderr)
319-
print(traceback.print_exc(), file=sys.stderr)
324+
LOGGER.exception("Unhandled exception:")
320325
if RETRIES <= 20:
321326
continue
322327
else:

0 commit comments

Comments
 (0)