-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathgenerate_projects_page.py
154 lines (124 loc) · 5.32 KB
/
generate_projects_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import datetime
import json
import os
from base64 import b64decode
import boto3
import git
import yaml
import emoji
from github import Github
from github.GithubException import UnknownObjectException
GIT_USER_NAME = 'CC creativecommons.github.io Bot'
GIT_USER_EMAIL = 'cc-creativecommons-github-io-bot@creativecommons.org'
GITHUB_USERNAME = 'cc-creativecommons-github-io-bot'
GITHUB_ORGANIZATION = 'creativecommons'
GITHUB_REPO_NAME = 'creativecommons.github.io-source'
ENCRYPTED_GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
# Decrypt code should run once and variables stored outside of the function
# handler so that these are decrypted once per container
DECRYPTED_GITHUB_TOKEN = boto3.client('kms').decrypt(CiphertextBlob=b64decode(ENCRYPTED_GITHUB_TOKEN))['Plaintext'].decode('ascii')
GITHUB_REPO_URL_WITH_CREDENTIALS = f'https://{GITHUB_USERNAME}:{DECRYPTED_GITHUB_TOKEN}@github.com/{GITHUB_ORGANIZATION}/{GITHUB_REPO_NAME}.git'
CC_METADATA_FILE_NAME = '.cc-metadata.yml'
LAMBDA_WORKING_DIRECTORY = '/tmp'
GIT_WORKING_DIRECTORY = f'{LAMBDA_WORKING_DIRECTORY}/{GITHUB_REPO_NAME}'
JSON_FILE_DIRECTORY = f'{GIT_WORKING_DIRECTORY}/databags'
def set_up_repo():
# Lambda seems to reuse resources sometimes and the clone already exists.
if not os.path.isdir(GIT_WORKING_DIRECTORY):
git.exec_command('clone', GITHUB_REPO_URL_WITH_CREDENTIALS)
git.exec_command('pull', GITHUB_REPO_URL_WITH_CREDENTIALS, cwd=GIT_WORKING_DIRECTORY)
return f'{LAMBDA_WORKING_DIRECTORY}/{GITHUB_REPO_NAME}'
def set_up_git_user():
os.environ['GIT_AUTHOR_NAME'] = GIT_USER_NAME
os.environ['GIT_AUTHOR_EMAIL'] = GIT_USER_EMAIL
os.environ['GIT_COMMITTER_NAME'] = GIT_USER_NAME
os.environ['GIT_COMMITTER_EMAIL'] = GIT_USER_EMAIL
def set_up_github_client():
github_client = Github(DECRYPTED_GITHUB_TOKEN)
return github_client
def get_cc_organization(github_client):
cc = github_client.get_organization(GITHUB_ORGANIZATION)
return cc
def get_repositories(organization):
repos = organization.get_repos()
return repos
def get_repo_github_data(repo):
repo_github_data = {
'id': repo.id,
'name': repo.name,
'url': repo.html_url,
'description': emoji.emojize(repo.description),
'website': repo.homepage,
'language': repo.language,
'created': repo.created_at.isoformat(),
}
try:
license = repo.get_license()
except UnknownObjectException:
license = None
if license:
repo_github_data['license'] = {
'name': license.license.name,
'url': license.html_url
}
else:
repo_github_data['license'] = None
return repo_github_data
def get_repo_cc_metadata(repo):
try:
cc_metadata_file = repo.get_contents(CC_METADATA_FILE_NAME)
except UnknownObjectException:
return {}
cc_metadata = yaml.load(cc_metadata_file.decoded_content)
if 'technologies' in cc_metadata:
cc_metadata['technologies'] = [technology.strip() for technology in cc_metadata['technologies'].split(',')]
return cc_metadata
def get_repo_data_list(repos):
repo_data_list = []
count = 1
total = repos.totalCount
for repo in repos:
print(f'Processing {count} of {total} – {repo.name}')
if not repo.private:
repo_cc_metadata = get_repo_cc_metadata(repo)
is_engineering_project = repo_cc_metadata.get('engineering_project', True)
if is_engineering_project:
repo_github_data = get_repo_github_data(repo)
repo_data = {**repo_github_data, **repo_cc_metadata}
repo_data_list.append(repo_data)
else:
print('\tNot an active engineering project, skipping')
count += 1
return sorted(repo_data_list, key=lambda k: k['name'].lower())
def get_repo_data_dict(repo_data_list):
# This is needed because Lektor needs a top level object (not array) in the JSON file.
return {
'repos': repo_data_list
}
def generate_json_file(repo_data_dict):
json_filename = f'{JSON_FILE_DIRECTORY}/repos.json'
with open(json_filename, 'w') as json_file:
json.dump(repo_data_dict, json_file, sort_keys=True, indent=4)
return json_filename
def commit_and_push_changes(json_filename):
# Leaving this in here because it's useful to test on separate branches sometimes.
# now = datetime.datetime.now().isoformat()
# for char in ['-', ':', '.', 'T']:
# now = now.replace(char, '_')
# branch_name = f'{now}_sync'
# git.exec_command('checkout', f'-b{branch_name}', cwd=GIT_WORKING_DIRECTORY)
git_diff = git.exec_command('diff', cwd=GIT_WORKING_DIRECTORY)
if git_diff != (b'', b''):
git.exec_command('add', f'{json_filename}', cwd=GIT_WORKING_DIRECTORY)
git.exec_command('commit', '-m Syncing new repository changes.', cwd=GIT_WORKING_DIRECTORY)
git.exec_command('push', GITHUB_REPO_URL_WITH_CREDENTIALS, cwd=GIT_WORKING_DIRECTORY)
def lambda_handler(*args, **kwargs):
set_up_repo()
set_up_git_user()
github_client = set_up_github_client()
cc = get_cc_organization(github_client)
repos = get_repositories(cc)
repo_data_list = get_repo_data_list(repos)
repo_data_dict = get_repo_data_dict(repo_data_list)
json_filename = generate_json_file(repo_data_dict)
commit_and_push_changes(json_filename)