30
30
31
31
# Constants
32
32
QUARTER = os .path .basename (PATHS ["data_quarter" ])
33
+ SECTION = "Google Custom Search (GCS)"
33
34
34
35
35
36
def parse_arguments ():
@@ -61,34 +62,73 @@ def parse_arguments():
61
62
args = parser .parse_args ()
62
63
if not args .enable_save and args .enable_git :
63
64
parser .error ("--enable-git requires --enable-save" )
65
+ args .logger = LOGGER
66
+ args .paths = PATHS
64
67
return args
65
68
66
69
70
+ def gcs_intro (args ):
71
+ """
72
+ Write Google Custom Search (GCS) introduction.
73
+ """
74
+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
75
+ file_path = shared .path_join (
76
+ PATHS ["data_2-process" ], "gcs_totals_by_product.csv"
77
+ )
78
+ LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
79
+ data = pd .read_csv (file_path )
80
+ shared .update_readme (
81
+ args ,
82
+ SECTION ,
83
+ "Overview" ,
84
+ None ,
85
+ None ,
86
+ "Google Custom Search (GCS) data uses the `totalResults` returned by"
87
+ " API for search queries of the legal tool URLs (quoted and using"
88
+ " `linkSite` for accuracy), countries codes, and language codes.\n "
89
+ "\n "
90
+ f"**The results show there are a total of { data ['Count' ].sum ():,d} "
91
+ " online documents in the commons--documents that are licensed or put"
92
+ " in the public domain using a Creative Commons (CC) legal tool.**\n "
93
+ "\n "
94
+ "Thank you Google for providing the Programable Search Engine: Custom"
95
+ " Search JSON API!\n " ,
96
+ )
97
+
98
+
67
99
def plot_top_25_tools (args ):
68
100
"""
69
101
Create a bar chart for the top 25 legal tools
70
102
"""
103
+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
71
104
file_path = shared .path_join (
72
105
PATHS ["data_2-process" ], "gcs_top_25_tools.csv"
73
106
)
74
- LOGGER .info ("Create a bar chart for the top 25 legal tools" )
75
107
LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
76
108
data = pd .read_csv (file_path )
77
109
78
110
plt .figure (figsize = (10 , 10 ))
79
- ax = sns .barplot (data , x = "Count" , y = "CC legal tool" )
111
+ y_column = "CC legal tool"
112
+ ax = sns .barplot (
113
+ data ,
114
+ x = "Count" ,
115
+ y = y_column ,
116
+ hue = y_column ,
117
+ palette = "pastel" ,
118
+ legend = False ,
119
+ )
80
120
for index , row in data .iterrows ():
81
121
ax .annotate (
82
122
f"{ row ['Count' ]:,d} " ,
83
- (4 , index ),
123
+ (4 + 80 , index ),
84
124
xycoords = ("axes points" , "data" ),
85
- color = "white " ,
86
- fontsize = "x- small" ,
87
- horizontalalignment = "left " ,
125
+ color = "black " ,
126
+ fontsize = "small" ,
127
+ horizontalalignment = "right " ,
88
128
verticalalignment = "center" ,
89
129
)
90
130
plt .title (f"Top 25 legal tools ({ args .quarter } )" )
91
- plt .xlabel ("Number of references " )
131
+ plt .xlabel ("Number of works " )
92
132
plt .ylabel ("Creative Commons (CC) legal tool" )
93
133
94
134
# Use the millions formatter for x-axis
@@ -113,13 +153,11 @@ def millions_formatter(x, pos):
113
153
plt .savefig (image_path )
114
154
115
155
shared .update_readme (
116
- PATHS ,
117
- image_path ,
118
- "Google Custom Search" ,
119
- "Bar chart showing the top 25 legal tools based on the count of"
120
- " search results for each legal tool's URL." ,
121
- "Top 25 legal tools" ,
122
156
args ,
157
+ SECTION ,
158
+ "Top 25 legal tools" ,
159
+ image_path ,
160
+ "Bar chart showing the top 25 individual legal tools." ,
123
161
)
124
162
125
163
LOGGER .info ("Visualization by license type created." )
@@ -129,10 +167,10 @@ def plot_totals_by_product(args):
129
167
"""
130
168
Create a bar chart of the totals by product
131
169
"""
170
+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
132
171
file_path = shared .path_join (
133
172
PATHS ["data_2-process" ], "gcs_totals_by_product.csv"
134
173
)
135
- LOGGER .info (__doc__ )
136
174
LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
137
175
data = pd .read_csv (file_path )
138
176
@@ -152,14 +190,14 @@ def plot_totals_by_product(args):
152
190
(0 + 80 , index ),
153
191
xycoords = ("axes points" , "data" ),
154
192
color = "black" ,
155
- fontsize = "x- small" ,
193
+ fontsize = "small" ,
156
194
horizontalalignment = "right" ,
157
195
verticalalignment = "center" ,
158
196
)
159
197
plt .title (f"Totals by product ({ args .quarter } )" )
160
198
plt .ylabel ("Creative Commons (CC) legal tool product" )
161
199
plt .xscale ("log" )
162
- plt .xlabel ("Number of references " )
200
+ plt .xlabel ("Number of works " )
163
201
164
202
# Use the millions formatter for x-axis
165
203
def millions_formatter (x , pos ):
@@ -185,15 +223,12 @@ def millions_formatter(x, pos):
185
223
plt .savefig (image_path )
186
224
187
225
shared .update_readme (
188
- PATHS ,
226
+ args ,
227
+ SECTION ,
228
+ "Totals by product" ,
189
229
image_path ,
190
- "Google Custom Search" ,
191
230
"Bar chart showing how many documents there are for each Creative"
192
- " Commons (CC) legal tool. **There are a total of"
193
- f" { data ['Count' ].sum ():,d} documents that are either CC licensed"
194
- " or put in the public domain using a CC legal tool.**" ,
195
- "Totals by product" ,
196
- args ,
231
+ " Commons (CC) legal tool product." ,
197
232
)
198
233
199
234
LOGGER .info ("Visualization by license type created." )
@@ -234,7 +269,7 @@ def millions_formatter(x, pos):
234
269
# plt.xticks(rotation=45)
235
270
#
236
271
# # Add value numbers to the top of each bar
237
- # for p in ax.patches :
272
+ # for p in ax.patcplot_totals_by_producthes :
238
273
# ax.annotate(
239
274
# format(p.get_height(), ",.0f"),
240
275
# (p.get_x() + p.get_width() / 2.0, p.get_height()),
@@ -265,12 +300,11 @@ def millions_formatter(x, pos):
265
300
# plt.show()
266
301
#
267
302
# shared.update_readme(
268
- # PATHS,
303
+ # args,
304
+ # SECTION,
305
+ # "Country Report",
269
306
# image_path,
270
- # "Google Custom Search",
271
307
# "Number of Google Webpages Licensed by Country",
272
- # "Country Report",
273
- # args,
274
308
# )
275
309
#
276
310
# LOGGER.info("Visualization by country created.")
@@ -343,25 +377,24 @@ def millions_formatter(x, pos):
343
377
# plt.show()
344
378
#
345
379
# shared.update_readme(
346
- # PATHS,
380
+ # args,
381
+ # SECTION,
382
+ # "Language Report",
347
383
# image_path,
348
- # "Google Custom Search",
349
384
# "Number of Google Webpages Licensed by Language",
350
- # "Language Report",
351
- # args,
352
385
# )
353
386
#
354
387
# LOGGER.info("Visualization by language created.")
355
388
356
389
357
390
def main ():
358
391
args = parse_arguments ()
359
- args .logger = LOGGER
360
392
shared .log_paths (LOGGER , PATHS )
361
393
shared .git_fetch_and_merge (args , PATHS ["repo" ])
362
394
363
- plot_top_25_tools (args )
395
+ gcs_intro (args )
364
396
plot_totals_by_product (args )
397
+ plot_top_25_tools (args )
365
398
# plot_by_country(data, args)
366
399
# plot_by_language(data, args)
367
400
0 commit comments