Skip to content

Commit ef56531

Browse files
committed
Added script that generates projects page.
1 parent fc40d55 commit ef56531

File tree

3 files changed

+161
-0
lines changed

3 files changed

+161
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ Please see the individual README files in the folder for each script for informa
88
| Script name | Script purpose |
99
|----------------------|------------------------------------------------------------------------|
1010
| `normalize_repos` | Ensures that all CC repos have standard labels and branch protections. |
11+
| `generate_projects_page` | Generates the ["Projects" page on CC Open Source](https://opensource.creativecommons.org/contributing-code/projects/). |

generate_projects_page/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Generate Projects Page
2+
3+
This script is run on AWS Lambda and generates the [Projects page on CC Open Source](https://opensource.creativecommons.org/contributing-code/projects/).
4+
5+
## Running the Script
6+
7+
This script needs to be manually synced to AWS Lambda and run there. You cannot run it locally.
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import datetime
2+
import json
3+
import os
4+
from base64 import b64decode
5+
6+
import boto3
7+
import git
8+
import yaml
9+
from github import Github
10+
from github.GithubException import UnknownObjectException
11+
12+
13+
GIT_USER_NAME = 'CC creativecommons.github.io Bot'
14+
GIT_USER_EMAIL = 'cc-creativecommons-github-io-bot@creativecommons.org'
15+
16+
GITHUB_USERNAME = 'cc-creativecommons-github-io-bot'
17+
GITHUB_ORGANIZATION = 'creativecommons'
18+
GITHUB_REPO_NAME = 'creativecommons.github.io-source'
19+
ENCRYPTED_GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
20+
# Decrypt code should run once and variables stored outside of the function
21+
# handler so that these are decrypted once per container
22+
DECRYPTED_GITHUB_TOKEN = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_GITHUB_TOKEN))['Plaintext'].decode('ascii')
23+
GITHUB_REPO_URL_WITH_CREDENTIALS = f'https://{GITHUB_USERNAME}:{DECRYPTED_GITHUB_TOKEN}@github.com/{GITHUB_ORGANIZATION}/{GITHUB_REPO_NAME}.git'
24+
CC_METADATA_FILE_NAME = '.cc-metadata.yml'
25+
26+
LAMBDA_WORKING_DIRECTORY = '/tmp'
27+
GIT_WORKING_DIRECTORY = f'{LAMBDA_WORKING_DIRECTORY}/{GITHUB_REPO_NAME}'
28+
JSON_FILE_DIRECTORY = f'{GIT_WORKING_DIRECTORY}/databags'
29+
30+
31+
def set_up_repo():
32+
# Lambda seems to reuse resources sometimes and the clone already exists.
33+
if not os.path.isdir(GIT_WORKING_DIRECTORY):
34+
git.exec_command('clone', GITHUB_REPO_URL_WITH_CREDENTIALS)
35+
git.exec_command('pull', GITHUB_REPO_URL_WITH_CREDENTIALS, cwd=GIT_WORKING_DIRECTORY)
36+
return f'{LAMBDA_WORKING_DIRECTORY}/{GITHUB_REPO_NAME}'
37+
38+
39+
def set_up_git_user():
40+
os.environ['GIT_AUTHOR_NAME'] = GIT_USER_NAME
41+
os.environ['GIT_AUTHOR_EMAIL'] = GIT_USER_EMAIL
42+
os.environ['GIT_COMMITTER_NAME'] = GIT_USER_NAME
43+
os.environ['GIT_COMMITTER_EMAIL'] = GIT_USER_EMAIL
44+
45+
46+
def set_up_github_client():
47+
github_client = Github(DECRYPTED_GITHUB_TOKEN)
48+
return github_client
49+
50+
51+
def get_cc_organization(github_client):
52+
cc = github_client.get_organization(GITHUB_ORGANIZATION)
53+
return cc
54+
55+
56+
def get_repositories(organization):
57+
repos = organization.get_repos()
58+
return repos
59+
60+
61+
def get_repo_github_data(repo):
62+
repo_github_data = {
63+
'id': repo.id,
64+
'name': repo.name,
65+
'url': repo.html_url,
66+
'description': repo.description,
67+
'website': repo.homepage,
68+
'language': repo.language,
69+
'created': repo.created_at.isoformat(),
70+
}
71+
try:
72+
license = repo.get_license()
73+
except UnknownObjectException:
74+
license = None
75+
if license:
76+
repo_github_data['license'] = {
77+
'name': license.license.name,
78+
'url': license.html_url
79+
}
80+
else:
81+
repo_github_data['license'] = None
82+
return repo_github_data
83+
84+
85+
def get_repo_cc_metadata(repo):
86+
try:
87+
cc_metadata_file = repo.get_contents(CC_METADATA_FILE_NAME)
88+
except UnknownObjectException:
89+
return {}
90+
cc_metadata = yaml.load(cc_metadata_file.decoded_content)
91+
if 'technologies' in cc_metadata:
92+
cc_metadata['technologies'] = [technology.strip() for technology in cc_metadata['technologies'].split(',')]
93+
return cc_metadata
94+
95+
96+
def get_repo_data_list(repos):
97+
repo_data_list = []
98+
count = 1
99+
total = repos.totalCount
100+
101+
for repo in repos:
102+
print(f'Processing {count} of {total}{repo.name}')
103+
if not repo.private:
104+
repo_cc_metadata = get_repo_cc_metadata(repo)
105+
is_engineering_project = repo_cc_metadata.get('engineering_project', True)
106+
if is_engineering_project:
107+
repo_github_data = get_repo_github_data(repo)
108+
repo_data = {**repo_github_data, **repo_cc_metadata}
109+
repo_data_list.append(repo_data)
110+
else:
111+
print('\tNot an active engineering project, skipping')
112+
count += 1
113+
return sorted(repo_data_list, key=lambda k: k['name'].lower())
114+
115+
116+
def get_repo_data_dict(repo_data_list):
117+
# This is needed because Lektor needs a top level object (not array) in the JSON file.
118+
return {
119+
'repos': repo_data_list
120+
}
121+
122+
123+
def generate_json_file(repo_data_dict):
124+
json_filename = f'{JSON_FILE_DIRECTORY}/repos.json'
125+
with open(json_filename, 'w') as json_file:
126+
json.dump(repo_data_dict, json_file, sort_keys=True, indent=4)
127+
return json_filename
128+
129+
130+
def commit_and_push_changes(json_filename):
131+
# Leaving this in here because it's useful to test on separate branches sometimes.
132+
# now = datetime.datetime.now().isoformat()
133+
# for char in ['-', ':', '.', 'T']:
134+
# now = now.replace(char, '_')
135+
# branch_name = f'{now}_sync'
136+
# git.exec_command('checkout', f'-b{branch_name}', cwd=GIT_WORKING_DIRECTORY)
137+
git_diff = git.exec_command('diff', cwd=GIT_WORKING_DIRECTORY)
138+
if git_diff != (b'', b''):
139+
git.exec_command('add', f'{json_filename}', cwd=GIT_WORKING_DIRECTORY)
140+
git.exec_command('commit', '-m Syncing new repository changes.', cwd=GIT_WORKING_DIRECTORY)
141+
git.exec_command('push', GITHUB_REPO_URL_WITH_CREDENTIALS, cwd=GIT_WORKING_DIRECTORY)
142+
143+
144+
def lambda_handler(*args, **kwargs):
145+
set_up_repo()
146+
set_up_git_user()
147+
github_client = set_up_github_client()
148+
cc = get_cc_organization(github_client)
149+
repos = get_repositories(cc)
150+
repo_data_list = get_repo_data_list(repos)
151+
repo_data_dict = get_repo_data_dict(repo_data_list)
152+
json_filename = generate_json_file(repo_data_dict)
153+
commit_and_push_changes(json_filename)

0 commit comments

Comments
 (0)