|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | +Fetch CC photo license data from Flickr API for quarterly analysis. |
| 4 | +""" |
| 5 | + |
| 6 | +# Standard library |
| 7 | +import argparse |
| 8 | +import csv |
| 9 | +import json |
| 10 | +import os |
| 11 | +import sys |
| 12 | +import time |
| 13 | +import traceback |
| 14 | + |
| 15 | +# Third-party |
| 16 | +import flickrapi |
| 17 | +from dotenv import load_dotenv |
| 18 | +from pygments import highlight |
| 19 | +from pygments.formatters import TerminalFormatter |
| 20 | +from pygments.lexers import PythonTracebackLexer |
| 21 | + |
| 22 | +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) |
| 23 | +# First-party/Local |
| 24 | +import shared # noqa: E402 |
| 25 | + |
| 26 | +LOGGER, PATHS = shared.setup(__file__) |
| 27 | +load_dotenv(PATHS["dotenv"]) |
| 28 | + |
| 29 | +FLICKR_API_KEY = os.getenv("FLICKR_API_KEY") |
| 30 | +FLICKR_API_SECRET = os.getenv("FLICKR_API_SECRET") |
| 31 | +FILE1_COUNT = os.path.join(PATHS["data_phase"], "flickr_1_count.csv") |
| 32 | +HEADER1_COUNT = ["LICENSE_ID", "LICENSE_NAME", "COUNT"] |
| 33 | +QUARTER = os.path.basename(PATHS["data_quarter"]) |
| 34 | + |
| 35 | +# flickr.photos.licenses.getInfo API |
| 36 | +FLICKR_LICENSES = { |
| 37 | + 1: "CC BY-NC-SA 2.0", |
| 38 | + 2: "CC BY-NC 2.0", |
| 39 | + 3: "CC BY-NC-ND 2.0", |
| 40 | + 4: "CC BY 2.0", |
| 41 | + 5: "CC BY-SA 2.0", |
| 42 | + 6: "CC BY-ND 2.0", |
| 43 | + 9: "Public Domain Dedication (CC0)", |
| 44 | + 10: "Public Domain Mark", |
| 45 | +} |
| 46 | + |
| 47 | +CC_LICENSES = [1, 2, 3, 4, 5, 6, 9, 10] |
| 48 | + |
| 49 | +LOGGER.info("Script execution started.") |
| 50 | + |
| 51 | + |
| 52 | +def parse_arguments(): |
| 53 | + LOGGER.info("Parsing command-line options") |
| 54 | + parser = argparse.ArgumentParser(description=__doc__) |
| 55 | + parser.add_argument( |
| 56 | + "--limit", |
| 57 | + type=int, |
| 58 | + default=100, |
| 59 | + help="Limit number of photos per license (default: 100)", |
| 60 | + ) |
| 61 | + parser.add_argument( |
| 62 | + "--enable-save", |
| 63 | + action="store_true", |
| 64 | + help="Enable saving results", |
| 65 | + ) |
| 66 | + parser.add_argument( |
| 67 | + "--enable-git", |
| 68 | + action="store_true", |
| 69 | + help="Enable git actions (fetch, merge, add, commit, and push)", |
| 70 | + ) |
| 71 | + parser.add_argument( |
| 72 | + "--dev", |
| 73 | + action="store_true", |
| 74 | + help="Development mode: generate fake data without API calls", |
| 75 | + ) |
| 76 | + args = parser.parse_args() |
| 77 | + if not args.enable_save and args.enable_git: |
| 78 | + parser.error("--enable-git requires --enable-save") |
| 79 | + return args |
| 80 | + |
| 81 | + |
| 82 | +def get_flickr_api(): |
| 83 | + LOGGER.info("Setting up Flickr API") |
| 84 | + |
| 85 | + if not FLICKR_API_KEY or not FLICKR_API_SECRET: |
| 86 | + raise shared.QuantifyingException( |
| 87 | + "Missing Flickr API credentials. Check your .env file." |
| 88 | + ) |
| 89 | + |
| 90 | + return flickrapi.FlickrAPI( |
| 91 | + FLICKR_API_KEY, |
| 92 | + FLICKR_API_SECRET, |
| 93 | + format="json", |
| 94 | + ) |
| 95 | + |
| 96 | + |
| 97 | +def fetch_license_count(flickr, license_id, limit=100): |
| 98 | + """Fetch photo count for a specific license from Flickr API.""" |
| 99 | + license_name = FLICKR_LICENSES.get(license_id, "Unknown") |
| 100 | + LOGGER.info(f"Fetching count for license {license_id}: {license_name}") |
| 101 | + |
| 102 | + try: |
| 103 | + photos_json = flickr.photos.search( |
| 104 | + license=license_id, per_page=min(limit, 500), page=1 |
| 105 | + ) |
| 106 | + |
| 107 | + photos_data = json.loads(photos_json.decode("utf-8")) |
| 108 | + |
| 109 | + if "photos" in photos_data and "total" in photos_data["photos"]: |
| 110 | + total = int(photos_data["photos"]["total"]) |
| 111 | + count = min(total, limit) |
| 112 | + LOGGER.info(f" Found {count} photos (total available: {total})") |
| 113 | + return count |
| 114 | + else: |
| 115 | + LOGGER.warning(f" No data returned for license {license_id}") |
| 116 | + return 0 |
| 117 | + |
| 118 | + except Exception as e: |
| 119 | + LOGGER.error(f" Failed to fetch count for license {license_id}: {e}") |
| 120 | + return 0 |
| 121 | + |
| 122 | + |
| 123 | +def generate_fake_data(args): |
| 124 | + """Generate fake data for dev mode.""" |
| 125 | + LOGGER.info("Creating fake data for dev mode") |
| 126 | + |
| 127 | + counts = {} |
| 128 | + base = args.limit // len(CC_LICENSES) |
| 129 | + for idx, license_id in enumerate(CC_LICENSES): |
| 130 | + counts[license_id] = base + (license_id * 10) + (idx * 5) |
| 131 | + |
| 132 | + return counts |
| 133 | + |
| 134 | + |
| 135 | +def save_data(args, license_counts): |
| 136 | + """Save license count data to CSV file.""" |
| 137 | + if not args.enable_save: |
| 138 | + LOGGER.info("Save disabled, skipping file write") |
| 139 | + return |
| 140 | + |
| 141 | + LOGGER.info(f"Writing data to {FILE1_COUNT}") |
| 142 | + |
| 143 | + data_rows = [] |
| 144 | + for license_id, count in license_counts.items(): |
| 145 | + data_rows.append( |
| 146 | + { |
| 147 | + "LICENSE_ID": license_id, |
| 148 | + "LICENSE_NAME": FLICKR_LICENSES[license_id], |
| 149 | + "COUNT": count, |
| 150 | + } |
| 151 | + ) |
| 152 | + |
| 153 | + data_rows.sort(key=lambda x: x["LICENSE_ID"]) |
| 154 | + |
| 155 | + with open(FILE1_COUNT, "w", newline="", encoding="utf-8") as csvfile: |
| 156 | + writer = csv.DictWriter(csvfile, fieldnames=HEADER1_COUNT) |
| 157 | + writer.writeheader() |
| 158 | + writer.writerows(data_rows) |
| 159 | + |
| 160 | + LOGGER.info(f"Successfully wrote {len(data_rows)} records") |
| 161 | + |
| 162 | + |
| 163 | +def main(): |
| 164 | + try: |
| 165 | + args = parse_arguments() |
| 166 | + |
| 167 | + if args.enable_git: |
| 168 | + shared.git_fetch_and_merge(args, PATHS["repo"]) |
| 169 | + |
| 170 | + license_counts = {} |
| 171 | + |
| 172 | + if args.dev: |
| 173 | + license_counts = generate_fake_data(args) |
| 174 | + else: |
| 175 | + flickr = get_flickr_api() |
| 176 | + |
| 177 | + for license_id in CC_LICENSES: |
| 178 | + count = fetch_license_count(flickr, license_id, args.limit) |
| 179 | + license_counts[license_id] = count |
| 180 | + time.sleep(0.1) |
| 181 | + |
| 182 | + save_data(args, license_counts) |
| 183 | + |
| 184 | + if args.enable_git: |
| 185 | + args = shared.git_add_and_commit( |
| 186 | + args, |
| 187 | + PATHS["repo"], |
| 188 | + PATHS["data_quarter"], |
| 189 | + f"Add Flickr data for {QUARTER}", |
| 190 | + ) |
| 191 | + shared.git_push_changes(args, PATHS["repo"]) |
| 192 | + |
| 193 | + total_photos = sum(license_counts.values()) |
| 194 | + LOGGER.info(f"Done. Total photos across all licenses: {total_photos}") |
| 195 | + |
| 196 | + for license_id in sorted(license_counts.keys()): |
| 197 | + count = license_counts[license_id] |
| 198 | + license_name = FLICKR_LICENSES[license_id] |
| 199 | + LOGGER.info( |
| 200 | + f" License {license_id} ({license_name}): {count} photos" |
| 201 | + ) |
| 202 | + |
| 203 | + except shared.QuantifyingException as e: |
| 204 | + LOGGER.error(f"Error: {e}") |
| 205 | + sys.exit(1) |
| 206 | + except Exception as e: |
| 207 | + LOGGER.error(f"Unexpected error: {e}") |
| 208 | + if LOGGER.isEnabledFor(10): |
| 209 | + traceback_str = traceback.format_exc() |
| 210 | + highlighted_traceback = highlight( |
| 211 | + traceback_str, PythonTracebackLexer(), TerminalFormatter() |
| 212 | + ) |
| 213 | + print(highlighted_traceback) |
| 214 | + sys.exit(1) |
| 215 | + |
| 216 | + |
| 217 | +if __name__ == "__main__": |
| 218 | + main() |
0 commit comments