|
| 1 | +# Standard library |
| 2 | +import os |
| 3 | +import os.path |
| 4 | +import sys |
| 5 | +import traceback |
| 6 | + |
| 7 | +# Third-party |
| 8 | +import requests |
| 9 | +from requests.adapters import HTTPAdapter |
| 10 | +from urllib3.util.retry import Retry |
| 11 | + |
| 12 | +sys.path.append(".") |
| 13 | +# First-party/Local |
| 14 | +import quantify # noqa: E402 |
| 15 | + |
| 16 | +PATH_REPO_ROOT, PATH_WORK_DIR, PATH_DOTENV, DATETIME_TODAY = quantify.setup( |
| 17 | + __file__ |
| 18 | +) |
| 19 | + |
| 20 | +DATA_WRITE_FILE = os.path.join( |
| 21 | + PATH_WORK_DIR, |
| 22 | + f"data_github_" |
| 23 | + f"{DATETIME_TODAY.year}_{DATETIME_TODAY.month}_{DATETIME_TODAY.day}.csv", |
| 24 | +) |
| 25 | + |
| 26 | + |
| 27 | +def set_up_data_file(): |
| 28 | + """Writes the header row of the data file.""" |
| 29 | + header_title = "LICENSE_TYPE,Repository Count" |
| 30 | + with open(DATA_WRITE_FILE, "w") as f: |
| 31 | + f.write(f"{header_title}\n") |
| 32 | + |
| 33 | + |
| 34 | +def get_response_elems(license): |
| 35 | + """Provides the metadata for query of specified parameters |
| 36 | + Args: |
| 37 | + license: |
| 38 | + A string representing the type of license, and should be a segment |
| 39 | + of its URL towards the license description. Alternatively, the |
| 40 | + default None value stands for having no assumption about license |
| 41 | + type. |
| 42 | + Returns: |
| 43 | + dict: A dictionary mapping metadata to its value provided from the API |
| 44 | + query of specified parameters. |
| 45 | + """ |
| 46 | + try: |
| 47 | + base_url = "https://api.github.com/search/repositories?q=license:" |
| 48 | + request_url = f"{base_url}{license}" |
| 49 | + max_retries = Retry( |
| 50 | + total=5, |
| 51 | + backoff_factor=10, |
| 52 | + status_forcelist=[403, 408, 429, 500, 502, 503, 504], |
| 53 | + ) |
| 54 | + session = requests.Session() |
| 55 | + session.mount("https://", HTTPAdapter(max_retries=max_retries)) |
| 56 | + with session.get(request_url) as response: |
| 57 | + response.raise_for_status() |
| 58 | + search_data = response.json() |
| 59 | + return {"totalResults": search_data["total_count"]} |
| 60 | + except Exception as e: |
| 61 | + raise e |
| 62 | + |
| 63 | + |
| 64 | +def record_license_data(license_type): |
| 65 | + """Writes the row for LICENSE_TYPE to file to contain Github Query data. |
| 66 | + Args: |
| 67 | + license_type: |
| 68 | + A string representing the type of license, and should be a segment |
| 69 | + of its URL towards the license description. Alternatively, the |
| 70 | + default None value stands for having no assumption about license |
| 71 | + type. |
| 72 | + """ |
| 73 | + data_log = ( |
| 74 | + f"{license_type}," |
| 75 | + f"{get_response_elems(license_type)['totalResults']}" |
| 76 | + ) |
| 77 | + with open(DATA_WRITE_FILE, "a") as f: |
| 78 | + f.write(f"{data_log}\n") |
| 79 | + |
| 80 | + |
| 81 | +def record_all_licenses(): |
| 82 | + """Records the data of all license types findable in the license list and |
| 83 | + records these data into the DATA_WRITE_FILE as specified in that constant. |
| 84 | + """ |
| 85 | + licenses = ["CC0-1.0", "CC-BY-4.0", "CC-BY-SA-4.0"] |
| 86 | + for license in licenses: |
| 87 | + record_license_data(license) |
| 88 | + |
| 89 | + |
| 90 | +def main(): |
| 91 | + set_up_data_file() |
| 92 | + record_all_licenses() |
| 93 | + |
| 94 | + |
| 95 | +if __name__ == "__main__": |
| 96 | + try: |
| 97 | + main() |
| 98 | + except SystemExit as e: |
| 99 | + sys.exit(e.code) |
| 100 | + except KeyboardInterrupt: |
| 101 | + print("INFO (130) Halted via KeyboardInterrupt.", file=sys.stderr) |
| 102 | + sys.exit(130) |
| 103 | + except Exception: |
| 104 | + print("ERROR (1) Unhandled exception:", file=sys.stderr) |
| 105 | + print(traceback.print_exc(), file=sys.stderr) |
| 106 | + sys.exit(1) |
0 commit comments