|
14 | 14 | "name": "Move data cleaning pipeline from API to Catalog"
|
15 | 15 | },
|
16 | 16 | {
|
17 |
| - "description": "Manage Catalog deployment and provisioning entirely through infrastructure as code.", |
18 |
| - "gid": "1167425798148811", |
19 |
| - "name": "Improve Catalog Deployment and Provisioning" |
20 |
| - }, |
21 |
| - { |
22 |
| - "description": "Create better documentation for community contributors by consolidating internal and public documentation and making it available for everyone.", |
23 |
| - "gid": "1167425798148815", |
24 |
| - "name": "Improve Documentation for Community Contributors" |
| 17 | + "description": "Update Catalog schema to include new metadata generated through AWS Rekognition.", |
| 18 | + "gid": "1154270978154717", |
| 19 | + "name": "Implement architecture for schema for new metadata [AWS Grant]" |
25 | 20 | },
|
26 | 21 | {
|
27 | 22 | "description": "Plan out search algorithm changes to incorporate image metadata generated via AWS Rekognition.",
|
28 | 23 | "gid": "1154270978154720",
|
29 | 24 | "name": "Plan search algorithm changes for new metadata [AWS Grant]"
|
30 | 25 | },
|
31 |
| - { |
32 |
| - "description": "Update Catalog schema to include new metadata generated through AWS Rekognition.", |
33 |
| - "gid": "1154270978154717", |
34 |
| - "name": "Implement architecture for schema for new metadata [AWS Grant]" |
35 |
| - }, |
36 | 26 | {
|
37 | 27 | "description": "Improve how and where we explain licenses, and consider ways to make it easier for reusers to understand and comply with license requirements.",
|
38 | 28 | "gid": "1147666754358269",
|
39 | 29 | "name": "License Explanation/Compliance Improvements"
|
40 | 30 | },
|
41 | 31 | {
|
42 |
| - "description": "Improve the support pages on CC Search, which includes the Collections page, for a better experience. Add explanation text for collections, improve flow.", |
43 |
| - "gid": "1149385618454685", |
44 |
| - "name": "Improved Support Pages" |
| 32 | + "description": "Offline Old Search (oldsearch.creativecommons.org) and redirect traffic to CC Search. Prior to this, build in messaging on Old Search, and support similar functionality on CC Search. See \"Meta Search Integration\" for related work.", |
| 33 | + "gid": "1149456632174214", |
| 34 | + "name": "Offline old CC Search" |
45 | 35 | },
|
46 | 36 | {
|
47 |
| - "description": "Integrating meta search functionality into CC Search for sources that are not currently indexed, and content types we do not currently support.", |
48 |
| - "gid": "1174575887784290", |
49 |
| - "name": "Design Sprint: Meta Search Integration" |
| 37 | + "description": "Research and test potential integrations for Web Monetization into CC Search and other CC web properties.", |
| 38 | + "gid": "1153114910798067", |
| 39 | + "name": "Web Monetization: Phase 1" |
50 | 40 | },
|
51 | 41 | {
|
52 |
| - "description": "Offline Old Search (oldsearch.creativecommons.org) and redirect traffic to CC Search. Prior to this, build in messaging on Old Search, and support similar functionality on CC Search. See \"Meta Search Integration\" for related work.", |
53 |
| - "gid": "1149456632174214", |
54 |
| - "name": "Offline old CC Search" |
| 42 | + "description": "Improve the support pages on CC Search, which includes the Collections page, for a better experience. Add explanation text for collections, improve flow.", |
| 43 | + "gid": "1149385618454685", |
| 44 | + "name": "Improved Support Pages" |
55 | 45 | },
|
56 | 46 | {
|
57 | 47 | "description": "Make accessibility improvements to the UI.",
|
|
63 | 53 | "gid": "1149456632174198",
|
64 | 54 | "name": "Internationalization Infrastructure"
|
65 | 55 | },
|
| 56 | + { |
| 57 | + "description": "Update our Common Crawl provider infrastructure to:\n(1) use Apache Airflow instead of AWS tools like Data Pipeline & Glue for processing data\n(2) unify provider processing to use the same base classes as API providers", |
| 58 | + "gid": "1167425798148813", |
| 59 | + "name": "Improve Common Crawl Infrastructure" |
| 60 | + }, |
66 | 61 | {
|
67 | 62 | "description": "Designing and prototyping an upcoming user interface for searching for audio on CC Search.",
|
68 | 63 | "gid": "1163392248010945",
|
|
73 | 68 | "gid": "1171015130050099",
|
74 | 69 | "name": "Audio Support and Integration"
|
75 | 70 | },
|
76 |
| - { |
77 |
| - "description": "Update our Common Crawl provider infrastructure to:\n(1) use Apache Airflow instead of AWS tools like Data Pipeline & Glue for processing data\n(2) unify provider processing to use the same base classes as API providers", |
78 |
| - "gid": "1167425798148813", |
79 |
| - "name": "Improve Common Crawl Infrastructure" |
80 |
| - }, |
81 |
| - { |
82 |
| - "description": "Switch our Catalog data ingestion for Wikimedia Commons to use the data dumps provided by Wikimedia instead of the MediaWiki API.", |
83 |
| - "gid": "1167425798148807", |
84 |
| - "name": "Use Data Dumps for Wikimedia Ingestion" |
85 |
| - }, |
86 |
| - { |
87 |
| - "description": "Research and test potential integrations for Web Monetization into CC Search and other CC web properties.", |
88 |
| - "gid": "1153114910798067", |
89 |
| - "name": "Web Monetization: Phase 1" |
90 |
| - }, |
91 | 71 | {
|
92 | 72 | "description": "Store a private copy of all the images in the CC Catalog to analyze via machine learning.",
|
93 | 73 | "gid": "1154270978154722",
|
94 |
| - "name": "Scraping & Resizing Work [AWS Grant]" |
| 74 | + "name": "Scraping & Resizing Work for Rekognition [AWS Grant]" |
95 | 75 | },
|
96 | 76 | {
|
97 |
| - "description": "Collect and use structured data from Wikidata to enhance our search algorithm with semantic search.", |
98 |
| - "gid": "1167425798148823", |
99 |
| - "name": "Wikidata integration with Catalog & Search Algorithm" |
100 |
| - }, |
101 |
| - { |
102 |
| - "description": "Build an analytics UI that is fed by Google Analytics and our internal analytics database.", |
103 |
| - "gid": "1149385618454692", |
104 |
| - "name": "Usage/Reuse Metrics Dashboard" |
| 77 | + "description": "Generate metadata via machine learning (using AWS Rekognition) on a set of ~100 million high quality images from the CC Catalog.", |
| 78 | + "gid": "1154270978154727", |
| 79 | + "name": "Run Rekognition on 100m images [AWS Grant]" |
105 | 80 | },
|
106 | 81 | {
|
107 | 82 | "description": "For all possible providers, use their APIs to ingest data into the CC Catalog instead of scraping websites via Common Crawl data.",
|
108 | 83 | "gid": "1149385618454708",
|
109 | 84 | "name": "Switch from Common Crawl to API"
|
110 |
| - }, |
| 85 | + } |
| 86 | + ] |
| 87 | + }, |
| 88 | + { |
| 89 | + "name": "Q4 2020", |
| 90 | + "tasks": [ |
111 | 91 | {
|
112 |
| - "description": "Generate metadata via machine learning (using AWS Rekognition) on a set of ~100 million high quality images from the CC Catalog.", |
113 |
| - "gid": "1154270978154727", |
114 |
| - "name": "Run Rekognition on 100m images [AWS Grant]" |
| 92 | + "description": null, |
| 93 | + "gid": "1186693612765822", |
| 94 | + "name": "Search Relevance Improvements: Language Analysis, Quality Metrics, Minimums" |
115 | 95 | },
|
116 | 96 | {
|
117 |
| - "description": "Upgrade the CC Catalog database to use a schema-less database instead of the relational database (Postgres) that we currently use.", |
118 |
| - "gid": "1167425798148817", |
119 |
| - "name": "Upgrade Catalog: Data Lake" |
| 97 | + "description": "Design updates to the CC Search UI in response to new metadata available as a result of applying machine learning to selected images in the Catalog. At a minimum, we expect new filters will be an option. Integration of design will take place subsequently.", |
| 98 | + "gid": "1154270978154729", |
| 99 | + "name": "Plan UI Updates in Response to Metadata [AWS Grant]" |
120 | 100 | },
|
121 | 101 | {
|
122 | 102 | "description": "Automate the process of finding new providers of CC-licensed content to index into the CC Catalog.",
|
123 | 103 | "gid": "1167425798148819",
|
124 | 104 | "name": "Provider Review Automation"
|
125 | 105 | },
|
126 | 106 | {
|
127 |
| - "description": "Implement changes to CC Search (frontend) and Catalog to make use of thumbnails, as they become available.", |
128 |
| - "gid": "1154270978154725", |
129 |
| - "name": "Implement Use of Thumbnails in Search & Catalog [AWS Grant]" |
| 107 | + "description": "Build an analytics UI that is fed by Google Analytics and our internal analytics database.", |
| 108 | + "gid": "1149385618454692", |
| 109 | + "name": "Usage/Reuse Metrics Dashboard" |
130 | 110 | },
|
131 | 111 | {
|
132 |
| - "description": "Prepare partnership guidelines for CC Search. Create a page on CC Search publishing these guidelines.", |
133 |
| - "gid": "1146971105237802", |
134 |
| - "name": "Partnership guidelines for all integration types" |
| 112 | + "description": "Once the Rekognition crawl finishes, we want to crawl the rest of the catalog (but not feed them to rekognition). This will give us useful metadata like dimensions and quality.", |
| 113 | + "gid": "1186693612765814", |
| 114 | + "name": "Scrape all images and set up feed for new ones" |
135 | 115 | },
|
136 | 116 | {
|
137 |
| - "description": "Design updates to the CC Search UI in response to new metadata available as a result of applying machine learning to selected images in the Catalog. At a minimum, we expect new filters will be an option. Integration of design will take place subsequently.", |
138 |
| - "gid": "1154270978154729", |
139 |
| - "name": "Plan UI Updates in Response to Metadata [AWS Grant]" |
140 |
| - } |
141 |
| - ] |
142 |
| - }, |
143 |
| - { |
144 |
| - "name": "Q4 2020", |
145 |
| - "tasks": [ |
| 117 | + "description": "Create better documentation for community contributors by consolidating internal and public documentation and making it available for everyone.", |
| 118 | + "gid": "1167425798148815", |
| 119 | + "name": "Improve Documentation for Community Contributors" |
| 120 | + }, |
| 121 | + { |
| 122 | + "description": "Manage Catalog deployment and provisioning entirely through infrastructure as code.", |
| 123 | + "gid": "1167425798148811", |
| 124 | + "name": "Improve Catalog Deployment and Provisioning" |
| 125 | + }, |
146 | 126 | {
|
147 | 127 | "description": "Make CC Catalog API documentation more accessible to CC Search users, and improve user experience.",
|
148 | 128 | "gid": "1164969092703369",
|
|
0 commit comments