|
13 | 13 | import traceback |
14 | 14 | import urllib.parse |
15 | 15 | from collections import Counter, defaultdict |
| 16 | +from operator import itemgetter |
16 | 17 |
|
17 | 18 | # Third-party |
18 | 19 | import feedparser |
@@ -455,60 +456,74 @@ def save_count_data( |
455 | 456 | # author_counts: {license: {author_count(int|None): count}} |
456 | 457 |
|
457 | 458 | # Save license counts |
| 459 | + data = [] |
| 460 | + for lic, c in license_counts.items(): |
| 461 | + data.append({"TOOL_IDENTIFIER": lic, "COUNT": c}) |
| 462 | + data.sort(key=itemgetter("TOOL_IDENTIFIER")) |
458 | 463 | with open(FILE_ARXIV_COUNT, "w", encoding="utf-8", newline="\n") as fh: |
459 | 464 | writer = csv.DictWriter(fh, fieldnames=HEADER_COUNT, dialect="unix") |
460 | 465 | writer.writeheader() |
461 | | - for lic, c in license_counts.items(): |
462 | | - writer.writerow({"TOOL_IDENTIFIER": lic, "COUNT": c}) |
| 466 | + for row in data: |
| 467 | + writer.writerow(row) |
463 | 468 |
|
464 | 469 | # Save category report with labels |
| 470 | + data = [] |
| 471 | + for lic, cats in category_counts.items(): |
| 472 | + for code, c in cats.items(): |
| 473 | + label = CATEGORIES.get(code, code) |
| 474 | + data.append( |
| 475 | + { |
| 476 | + "TOOL_IDENTIFIER": lic, |
| 477 | + "CATEGORY_CODE": code, |
| 478 | + "CATEGORY_LABEL": label, |
| 479 | + "COUNT": c, |
| 480 | + } |
| 481 | + ) |
| 482 | + data.sort(key=itemgetter("TOOL_IDENTIFIER", "CATEGORY_CODE")) |
465 | 483 | with open( |
466 | 484 | FILE_ARXIV_CATEGORY_REPORT, "w", encoding="utf-8", newline="\n" |
467 | 485 | ) as fh: |
468 | 486 | writer = csv.DictWriter( |
469 | 487 | fh, fieldnames=HEADER_CATEGORY_REPORT, dialect="unix" |
470 | 488 | ) |
471 | 489 | writer.writeheader() |
472 | | - for lic, cats in category_counts.items(): |
473 | | - for code, c in cats.items(): |
474 | | - label = CATEGORIES.get(code, code) |
475 | | - writer.writerow( |
476 | | - { |
477 | | - "TOOL_IDENTIFIER": lic, |
478 | | - "CATEGORY_CODE": code, |
479 | | - "CATEGORY_LABEL": label, |
480 | | - "COUNT": c, |
481 | | - } |
482 | | - ) |
| 490 | + for row in data: |
| 491 | + writer.writerow(row) |
483 | 492 |
|
484 | 493 | # Save year counts |
| 494 | + data = [] |
| 495 | + for lic, years in year_counts.items(): |
| 496 | + for year, c in years.items(): |
| 497 | + data.append({"TOOL_IDENTIFIER": lic, "YEAR": year, "COUNT": c}) |
| 498 | + data.sort(key=itemgetter("TOOL_IDENTIFIER", "YEAR")) |
485 | 499 | with open(FILE_ARXIV_YEAR, "w", encoding="utf-8", newline="\n") as fh: |
486 | 500 | writer = csv.DictWriter(fh, fieldnames=HEADER_YEAR, dialect="unix") |
487 | 501 | writer.writeheader() |
488 | | - for lic, years in year_counts.items(): |
489 | | - for year, c in years.items(): |
490 | | - writer.writerow( |
491 | | - {"TOOL_IDENTIFIER": lic, "YEAR": year, "COUNT": c} |
492 | | - ) |
| 502 | + for row in data: |
| 503 | + writer.writerow(row) |
493 | 504 |
|
494 | 505 | # Save author buckets summary |
| 506 | + data = [] |
| 507 | + for lic, acs in author_counts.items(): |
| 508 | + # build buckets across licenses |
| 509 | + bucket_counts = Counter() |
| 510 | + for ac, c in acs.items(): |
| 511 | + b = bucket_author_count(ac) |
| 512 | + bucket_counts[b] += c |
| 513 | + for b, c in bucket_counts.items(): |
| 514 | + data.append( |
| 515 | + {"TOOL_IDENTIFIER": lic, "AUTHOR_BUCKET": b, "COUNT": c} |
| 516 | + ) |
| 517 | + data.sort(key=itemgetter("TOOL_IDENTIFIER", "AUTHOR_BUCKET")) |
495 | 518 | with open( |
496 | 519 | FILE_ARXIV_AUTHOR_BUCKET, "w", encoding="utf-8", newline="\n" |
497 | 520 | ) as fh: |
498 | 521 | writer = csv.DictWriter( |
499 | 522 | fh, fieldnames=HEADER_AUTHOR_BUCKET, dialect="unix" |
500 | 523 | ) |
501 | 524 | writer.writeheader() |
502 | | - # build buckets across licenses |
503 | | - for lic, acs in author_counts.items(): |
504 | | - bucket_counts = Counter() |
505 | | - for ac, c in acs.items(): |
506 | | - b = bucket_author_count(ac) |
507 | | - bucket_counts[b] += c |
508 | | - for b, c in bucket_counts.items(): |
509 | | - writer.writerow( |
510 | | - {"TOOL_IDENTIFIER": lic, "AUTHOR_BUCKET": b, "COUNT": c} |
511 | | - ) |
| 525 | + for row in data: |
| 526 | + writer.writerow(row) |
512 | 527 |
|
513 | 528 |
|
514 | 529 | def query_arxiv(args): |
|
0 commit comments