Skip to content

Integrate Python logging module into all .py files of Quantifying + README Update for Logging #97

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 40 commits into from
Apr 1, 2024
Merged
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a7ba26d
Update README.md with Python Logging resource
naishasinha Mar 18, 2024
0eaa594
Integrate logging module into deviantart_scratcher.py
naishasinha Mar 18, 2024
32da947
Integrate logging module into data_analysis.py
naishasinha Mar 18, 2024
dba6b11
Integrate logging module into data_cleaning.py
naishasinha Mar 18, 2024
efc8c76
Integrate logging module into photos_detail.py
naishasinha Mar 18, 2024
c4d1140
Integrate logging module into photos.py
naishasinha Mar 18, 2024
ace1987
Integrate logging module into google_scratcher.py
naishasinha Mar 18, 2024
b69bc55
Integrate logging into internetarchive_scratcher.py
naishasinha Mar 18, 2024
e86c66e
Integrate logging module into metmuseum_scratcher.py
naishasinha Mar 18, 2024
085a3cd
Integrate logging module into vimeo_scratcher.py
naishasinha Mar 18, 2024
bfb3ce9
Integrate logging module into wikicommons_scratcher.py
naishasinha Mar 18, 2024
2b28549
Integrate logging module into wikipedia_scratcher.py
naishasinha Mar 18, 2024
093b281
Integrate logging module into youtube_scratcher.py
naishasinha Mar 18, 2024
07dcb28
Update deviantart_scratcher.py with correct logging statement
naishasinha Mar 20, 2024
e0e16d4
Update data_cleaning.py with correct logging statement
naishasinha Mar 20, 2024
eb966af
Update photos.py with correct logging statement
naishasinha Mar 20, 2024
55f597a
Update photos_detail.py with correct logging statement
naishasinha Mar 20, 2024
794db36
Update data_analysis.py with correct logging statements
naishasinha Mar 20, 2024
46582f1
Update deviantart_scratcher.py with correct logging
naishasinha Mar 21, 2024
d54fe4a
Update data_cleaning.py with correct logging
naishasinha Mar 21, 2024
4966b31
Update photos.py with correct logging
naishasinha Mar 21, 2024
b5cb64f
Update photos_detail.py with correct logging
naishasinha Mar 21, 2024
29dc272
Update google_scratcher.py with correct logging
naishasinha Mar 21, 2024
361e684
Update internetarchive_scratcher.py with correct logging
naishasinha Mar 21, 2024
834f3a4
Update metmuseum_scratcher.py with correct logging
naishasinha Mar 21, 2024
2f07873
Update vimeo_scratcher.py with correct logging
naishasinha Mar 21, 2024
788190c
Update wikicommons_scratcher.py with correct logging statements
naishasinha Mar 21, 2024
eb1853f
Update wikipedia_scratcher.py with correct logging statements
naishasinha Mar 21, 2024
8c099ec
Update youtube_scratcher.py with correct logging statements
naishasinha Mar 21, 2024
3b0d668
Merge branch 'main' into main
TimidRobot Mar 26, 2024
d25b2b4
Updated deviantart_scratcher.py with f-string
naishasinha Mar 26, 2024
79d29e5
Imported traceback to all files, fixing 'undefined traceback' error. …
naishasinha Mar 30, 2024
739068a
Resolve merge conflicts
naishasinha Mar 30, 2024
c70e000
Merge branch 'main' into main
naishasinha Mar 30, 2024
14536f8
Update deviantart_scratcher.py with removed datetime import
naishasinha Apr 1, 2024
cdcb8db
Update metmuseum_scratcher.py - removed datetime import
naishasinha Apr 1, 2024
42f9d3c
Update vimeo_scratcher.py - removed datetime import
naishasinha Apr 1, 2024
9467718
Update wikicommons_scratcher.py - removed datetime import
naishasinha Apr 1, 2024
29bd439
Update youtube_scratcher.py - removed datetime import
naishasinha Apr 1, 2024
2d58fb2
Fixed pre-commit whitespace issues
naishasinha Apr 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Integrate logging module into wikipedia_scratcher.py
  • Loading branch information
naishasinha authored Mar 18, 2024
commit 2b28549339ffcab262c922f2c07a1f7577734337
44 changes: 35 additions & 9 deletions wikipedia/wikipedia_scratcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import datetime as dt
import os
import sys
import traceback
import logging

# Third-party
import pandas as pd
Expand All @@ -21,6 +21,22 @@
f"{CWD}" f"/data_wikipedia_{today.year}_{today.month}_{today.day}.csv"
)

# Set up the logger
LOG = logging.getLogger(__name__)
LOG.setLevel(logging.INFO)

# Define both the handler and the formatter
handler = logging.StreamHandler()
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s")

# Add formatter to the handler
handler.setFormatter(formatter)

# Add handler to the logger
LOG.addHandler(handler)

# Log the start of the script execution
LOG.info("Script execution started.")

def get_wiki_langs():
"""
Expand All @@ -35,6 +51,7 @@ def get_wiki_langs():
- pd.DataFrame: A Dataframe containing information of each Wikipedia
language and its respective encoding on web address.
"""
LOG.info("Providing the list of language to find Creative Commons usage data on.")
return pd.read_csv(f"{CWD}/language-codes_csv.csv")


Expand All @@ -50,6 +67,8 @@ def get_request_url(lang="en"):
- string: A string representing the API Endpoint URL for the query
specified by this function's parameters.
"""
LOG.info("Providing the API Endpoint URL for specified parameter combinations.")

base_url = (
r"wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=statistics"
r"&format=json"
Expand All @@ -70,6 +89,8 @@ def get_response_elems(language="en"):
- dict: A dictionary mapping metadata to its value provided from the API
query of specified parameters.
"""
LOG.info("Providing the metadata for query of specified parameters")

search_data = None
try:
request_url = get_request_url(language)
Expand All @@ -88,22 +109,22 @@ def get_response_elems(language="en"):
return search_data_dict
except Exception as e:
if search_data is None:
print(
LOG.exception(
f"Received Result is None due to Language {language} absent as"
"an available Wikipedia client. Will therefore return an empty"
"dictionary for result, but will continue querying.",
file=sys.stderr,
)
"dictionary for result, but will continue querying.")
return {}
elif "query" not in search_data:
print(f"search data is: \n{search_data}", file=sys.stderr)
LOG.exception(f"search data is: \n{search_data}")
sys.exit(1)
else:
raise e


def set_up_data_file():
"""Writes the header row to file to contain Wikipedia Query data."""
LOG.info("Writing the header row to file to contain Wikipedia Query data.")

header_title = ",".join(get_response_elems())
with open(DATA_WRITE_FILE, "w") as f:
f.write(f"{header_title}\n")
Expand All @@ -116,6 +137,8 @@ def record_lang_data(lang="en"):
- lang: A string representing the language that the search results are
presented in. Alternatively, the default value is by Wikipedia customs "en"
"""
LOG.info("Writing the row for LICENSE_TYPE to file to contain Google Query data.")

response = get_response_elems(lang)
if response != {}:
response_values = response.values()
Expand All @@ -128,6 +151,8 @@ def record_all_licenses():
"""Records the data of all language types findable in the language list and
records these data into the DATA_WRITE_FILE as specified in that constant.
"""
LOG.info("Recording the data of all language types findable in the language list and recording into DATA_WRITE_FILE")

wiki_langs = get_wiki_langs()
for iso_language_code in wiki_langs["alpha2"]:
record_lang_data(iso_language_code)
Expand All @@ -141,6 +166,7 @@ def get_current_data():
- pd.DataFrame: A DataFrame recording the number of CC-licensed documents
per search query of assumption.
"""
LOG.info("Returning a DataFrame for the Creative Commons usage data collected")
return pd.read_csv(DATA_WRITE_FILE).set_index("language")


Expand All @@ -153,11 +179,11 @@ def main():
try:
main()
except SystemExit as e:
LOG.error("System exit with code: %d", e.code)
sys.exit(e.code)
except KeyboardInterrupt:
print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr)
LOG.info("Halted via KeyboardInterrupt.")
sys.exit(130)
except Exception:
print("ERROR (1) Unhandled exception:", file=sys.stderr)
print(traceback.print_exc(), file=sys.stderr)
LOG.exception("Unhandled exception occurred during script execution:")
sys.exit(1)