Skip to content

Commit b19da55

Browse files
authored
Merge pull request #121 from creativecommons/fetch-automation
Merge fetch-automation into gsoc2024-dev-1 branch
2 parents 9632900 + b338379 commit b19da55

File tree

10 files changed

+132
-28
lines changed

10 files changed

+132
-28
lines changed

.github/workflows/fetch.yml

+18-3
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@ name: Fetch Data
22

33
on:
44
push:
5-
branches:
6-
- automation-workflow
75
schedule:
86
- cron: '15 1 1-20 1,4,7,10 *'
7+
- cron: '15 5 1-20 1,4,7,10 *'
8+
- cron: '15 9 1-20 1,4,7,10 *'
9+
- cron: '15 13 1-20 1,4,7,10 *'
10+
- cron: '15 17 1-20 1,4,7,10 *'
11+
- cron: '15 21 1-20 1,4,7,10 *'
12+
- cron: '15 23 1-20 1,4,7,10 *'
913
workflow_dispatch:
1014

1115
jobs:
@@ -39,9 +43,20 @@ jobs:
3943
run: |
4044
python scripts/1-fetch/gcs_fetched.py
4145
42-
- name: Commit and push changes if needed
46+
- name: Fetch and merge changes
47+
env:
48+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49+
run: |
50+
python scripts/shared.py --operation fetch_and_merge --branch fetch-automation
51+
52+
- name: Add and commit changes
4353
env:
4454
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
4555
run: |
4656
python scripts/shared.py --operation add_and_commit --message "Automated data fetch and commit"
57+
58+
- name: Push changes
59+
env:
60+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
61+
run: |
4762
python scripts/shared.py --operation push

.github/workflows/process.yml

+24-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
name: Process Data
22

33
on:
4-
# push:
5-
# branches:
6-
# - automation-workflow
4+
# push:
75
schedule:
8-
- cron: '15 1 1-20 2,5,8,11 *'
6+
- cron: '15 1 1-20 1,4,7,10 *'
7+
- cron: '15 5 1-20 1,4,7,10 *'
8+
- cron: '15 9 1-20 1,4,7,10 *'
9+
- cron: '15 13 1-20 1,4,7,10 *'
10+
- cron: '15 17 1-20 1,4,7,10 *'
11+
- cron: '15 21 1-20 1,4,7,10 *'
12+
- cron: '15 23 1-20 1,4,7,10 *'
913
workflow_dispatch:
1014

1115
jobs:
@@ -26,14 +30,29 @@ jobs:
2630
pip install --upgrade pip
2731
pip install pipenv
2832
pipenv sync --system
33+
pip install -r requirements.txt
34+
35+
- name: Set PYTHONPATH
36+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
2937

3038
- name: Run process script
3139
run: |
3240
python scripts/2-process/gcs_processed.py
3341
34-
- name: Commit and push changes if needed
42+
- name: Fetch and merge changes
43+
env:
44+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45+
run: |
46+
python scripts/shared.py --operation fetch_and_merge --branch fetch-automation
47+
48+
- name: Add and commit changes
3549
env:
3650
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3751
run: |
3852
python scripts/shared.py --operation add_and_commit --message "Automated data processing and commit"
53+
54+
- name: Push changes
55+
env:
56+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57+
run: |
3958
python scripts/shared.py --operation push

.github/workflows/report.yml

+25-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
name: Generate Report
22

33
on:
4-
# push:
5-
# branches:
6-
# - automation-workflow
4+
push:
75
schedule:
8-
- cron: '15 1 1-20 3,6,9,12 *'
6+
- cron: '15 1 1-20 1,4,7,10 *'
7+
- cron: '15 5 1-20 1,4,7,10 *'
8+
- cron: '15 9 1-20 1,4,7,10 *'
9+
- cron: '15 13 1-20 1,4,7,10 *'
10+
- cron: '15 17 1-20 1,4,7,10 *'
11+
- cron: '15 21 1-20 1,4,7,10 *'
12+
- cron: '15 23 1-20 1,4,7,10 *'
913
workflow_dispatch:
1014

1115
jobs:
@@ -26,14 +30,29 @@ jobs:
2630
pip install --upgrade pip
2731
pip install pipenv
2832
pipenv sync --system
33+
pip install -r requirements.txt
34+
35+
- name: Set PYTHONPATH
36+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
2937

3038
- name: Run report script
3139
run: |
3240
python scripts/3-report/gcs_reports.py
3341
34-
- name: Commit and push changes if needed
42+
- name: Fetch and merge changes
43+
env:
44+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45+
run: |
46+
python scripts/shared.py --operation fetch_and_merge --branch fetch-automation
47+
48+
- name: Add and commit changes
49+
env:
50+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
51+
run: |
52+
python scripts/shared.py --operation add_and_commit --message "Automated data report and commit"
53+
54+
- name: Push changes
3555
env:
3656
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3757
run: |
38-
python scripts/shared.py --operation add_and_commit --message "Automated report generation and commit"
3958
python scripts/shared.py --operation push

data/2024Q3/1-fetch/gcs_fetched.csv

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ https://creativecommons.org/licenses/by-nc-sa/3.0,35800000,29800000,18000,5760,1
1616
https://creativecommons.org/licenses/sa/1.0,120000000,101000000,23500,16600,167000,56500,24400,116000000,247000,314000,29100,120000000,213000
1717
https://creativecommons.org/licenses/sa/1.0,120000000,101000000,23500,16600,167000,56500,24400,116000000,247000,314000,29100,120000000,213000
1818
https://creativecommons.org/licenses/by-nc-sa/2.0,20400000,13800000,21400,3730,65000,21700,9270,19100000,92600,773000,4470,20400000,5740
19+
https://creativecommons.org/licenses/by-sa/2.0,0,0,0,0,0,0,0,0,0,0,0,0,0
3.84 KB
Loading
-4.95 KB
Loading
2.67 KB
Loading

data/2024Q3/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@
44
## Data Source: Google Custom Search
55

66

7+
### Country Report
8+
![Number of Google Webpages Licensed by Country](3-report/gcs_country_report.png)
9+
Number of Google Webpages Licensed by Country
10+
11+
### License Type Report
12+
![Number of Webpages Licensed by License Type](3-report/gcs_licensetype_report.png)
13+
Number of Webpages Licensed by License Type
14+
15+
### Language Report
16+
![Number of Google Webpages Licensed by Language](3-report/gcs_language_report.png)
17+
Number of Google Webpages Licensed by Language
18+
719
### Country Report
820
![Number of Google Webpages Licensed by Country](3-report/gcs_country_report.png)
921
Number of Google Webpages Licensed by Country

data/2024Q3/state.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
total_records_retrieved: 234
1+
total_records_retrieved: 325

scripts/shared.py

+51-13
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,46 @@ def log_paths(logger, paths):
5252
logger.info(f"PATHS:{paths_list}")
5353

5454

55-
def commit_changes(message):
56-
repo_path = os.getcwd()
55+
def fetch_and_merge(repo_path, branch="fetch-automation"):
5756
try:
5857
repo = Repo(repo_path)
58+
origin = repo.remote(name="origin")
59+
origin.fetch()
60+
repo.git.merge(f"origin/{branch}", allow_unrelated_histories=True)
61+
logging.info(f"Fetched and merged latest changes from {branch}")
5962
except InvalidGitRepositoryError:
6063
logging.error(f"Invalid Git repository at {repo_path}")
61-
return
6264
except NoSuchPathError:
6365
logging.error(f"No such path: {repo_path}")
64-
return
66+
except Exception as e:
67+
logging.error(f"Error during fetch and merge: {e}")
6568

66-
repo.git.add(update=True)
67-
repo.index.commit(message)
68-
origin = repo.remote(name="origin")
69-
origin.push()
69+
70+
def add_and_commit(repo_path, message):
71+
try:
72+
repo = Repo(repo_path)
73+
if not repo.is_dirty(untracked_files=True):
74+
logging.info("No changes to commit")
75+
return
76+
repo.git.add(update=True)
77+
repo.index.commit(message)
78+
logging.info("Changes committed")
79+
except InvalidGitRepositoryError:
80+
logging.error(f"Invalid Git repository at {repo_path}")
81+
except NoSuchPathError:
82+
logging.error(f"No such path: {repo_path}")
83+
84+
85+
def push_changes(repo_path):
86+
try:
87+
repo = Repo(repo_path)
88+
origin = repo.remote(name="origin")
89+
origin.push()
90+
logging.info("Changes pushed")
91+
except InvalidGitRepositoryError:
92+
logging.error(f"Invalid Git repository at {repo_path}")
93+
except NoSuchPathError:
94+
logging.error(f"No such path: {repo_path}")
7095

7196

7297
def main():
@@ -75,16 +100,29 @@ def main():
75100
"--operation",
76101
type=str,
77102
required=True,
78-
help="Operation to perform: commit",
103+
help="Operation to perform: fetch_and_merge, add_and_commit, push",
79104
)
105+
parser.add_argument("--message", type=str, help="Commit message")
80106
parser.add_argument(
81-
"--message", type=str, required=True, help="Commit message"
107+
"--branch",
108+
type=str,
109+
default="fetch-automation",
110+
help="Branch to fetch and merge from",
82111
)
83-
84112
args = parser.parse_args()
85113

86-
if args.operation == "commit":
87-
commit_changes(args.message)
114+
repo_path = os.getcwd() # Assuming the script runs in the root of the repo
115+
116+
if args.operation == "fetch_and_merge":
117+
fetch_and_merge(repo_path, args.branch)
118+
elif args.operation == "add_and_commit":
119+
if not args.message:
120+
raise ValueError(
121+
"Commit message is required for add_and_commit operation"
122+
)
123+
add_and_commit(repo_path, args.message)
124+
elif args.operation == "push":
125+
push_changes(repo_path)
88126
else:
89127
raise ValueError("Unsupported operation")
90128

0 commit comments

Comments
 (0)