3030
3131# Constants
3232QUARTER = os .path .basename (PATHS ["data_quarter" ])
33+ SECTION = "Google Custom Search (GCS)"
3334
3435
3536def parse_arguments ():
@@ -61,34 +62,73 @@ def parse_arguments():
6162 args = parser .parse_args ()
6263 if not args .enable_save and args .enable_git :
6364 parser .error ("--enable-git requires --enable-save" )
65+ args .logger = LOGGER
66+ args .paths = PATHS
6467 return args
6568
6669
70+ def gcs_intro (args ):
71+ """
72+ Write Google Custom Search (GCS) introduction.
73+ """
74+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
75+ file_path = shared .path_join (
76+ PATHS ["data_2-process" ], "gcs_totals_by_product.csv"
77+ )
78+ LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
79+ data = pd .read_csv (file_path )
80+ shared .update_readme (
81+ args ,
82+ SECTION ,
83+ "Overview" ,
84+ None ,
85+ None ,
86+ "Google Custom Search (GCS) data uses the `totalResults` returned by"
87+ " API for search queries of the legal tool URLs (quoted and using"
88+ " `linkSite` for accuracy), countries codes, and language codes.\n "
89+ "\n "
90+ f"**The results show there are a total of { data ['Count' ].sum ():,d} "
91+ " online documents in the commons--documents that are licensed or put"
92+ " in the public domain using a Creative Commons (CC) legal tool.**\n "
93+ "\n "
94+ "Thank you Google for providing the Programable Search Engine: Custom"
95+ " Search JSON API!\n " ,
96+ )
97+
98+
6799def plot_top_25_tools (args ):
68100 """
69101 Create a bar chart for the top 25 legal tools
70102 """
103+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
71104 file_path = shared .path_join (
72105 PATHS ["data_2-process" ], "gcs_top_25_tools.csv"
73106 )
74- LOGGER .info ("Create a bar chart for the top 25 legal tools" )
75107 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
76108 data = pd .read_csv (file_path )
77109
78110 plt .figure (figsize = (10 , 10 ))
79- ax = sns .barplot (data , x = "Count" , y = "CC legal tool" )
111+ y_column = "CC legal tool"
112+ ax = sns .barplot (
113+ data ,
114+ x = "Count" ,
115+ y = y_column ,
116+ hue = y_column ,
117+ palette = "pastel" ,
118+ legend = False ,
119+ )
80120 for index , row in data .iterrows ():
81121 ax .annotate (
82122 f"{ row ['Count' ]:,d} " ,
83- (4 , index ),
123+ (4 + 80 , index ),
84124 xycoords = ("axes points" , "data" ),
85- color = "white " ,
86- fontsize = "x- small" ,
87- horizontalalignment = "left " ,
125+ color = "black " ,
126+ fontsize = "small" ,
127+ horizontalalignment = "right " ,
88128 verticalalignment = "center" ,
89129 )
90130 plt .title (f"Top 25 legal tools ({ args .quarter } )" )
91- plt .xlabel ("Number of references " )
131+ plt .xlabel ("Number of works " )
92132 plt .ylabel ("Creative Commons (CC) legal tool" )
93133
94134 # Use the millions formatter for x-axis
@@ -113,13 +153,11 @@ def millions_formatter(x, pos):
113153 plt .savefig (image_path )
114154
115155 shared .update_readme (
116- PATHS ,
117- image_path ,
118- "Google Custom Search" ,
119- "Bar chart showing the top 25 legal tools based on the count of"
120- " search results for each legal tool's URL." ,
121- "Top 25 legal tools" ,
122156 args ,
157+ SECTION ,
158+ "Top 25 legal tools" ,
159+ image_path ,
160+ "Bar chart showing the top 25 individual legal tools." ,
123161 )
124162
125163 LOGGER .info ("Visualization by license type created." )
@@ -129,10 +167,10 @@ def plot_totals_by_product(args):
129167 """
130168 Create a bar chart of the totals by product
131169 """
170+ LOGGER .info (plot_totals_by_product .__doc__ .strip ())
132171 file_path = shared .path_join (
133172 PATHS ["data_2-process" ], "gcs_totals_by_product.csv"
134173 )
135- LOGGER .info (__doc__ )
136174 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
137175 data = pd .read_csv (file_path )
138176
@@ -152,14 +190,14 @@ def plot_totals_by_product(args):
152190 (0 + 80 , index ),
153191 xycoords = ("axes points" , "data" ),
154192 color = "black" ,
155- fontsize = "x- small" ,
193+ fontsize = "small" ,
156194 horizontalalignment = "right" ,
157195 verticalalignment = "center" ,
158196 )
159197 plt .title (f"Totals by product ({ args .quarter } )" )
160198 plt .ylabel ("Creative Commons (CC) legal tool product" )
161199 plt .xscale ("log" )
162- plt .xlabel ("Number of references " )
200+ plt .xlabel ("Number of works " )
163201
164202 # Use the millions formatter for x-axis
165203 def millions_formatter (x , pos ):
@@ -185,15 +223,12 @@ def millions_formatter(x, pos):
185223 plt .savefig (image_path )
186224
187225 shared .update_readme (
188- PATHS ,
226+ args ,
227+ SECTION ,
228+ "Totals by product" ,
189229 image_path ,
190- "Google Custom Search" ,
191230 "Bar chart showing how many documents there are for each Creative"
192- " Commons (CC) legal tool. **There are a total of"
193- f" { data ['Count' ].sum ():,d} documents that are either CC licensed"
194- " or put in the public domain using a CC legal tool.**" ,
195- "Totals by product" ,
196- args ,
231+ " Commons (CC) legal tool product." ,
197232 )
198233
199234 LOGGER .info ("Visualization by license type created." )
@@ -234,7 +269,7 @@ def millions_formatter(x, pos):
234269# plt.xticks(rotation=45)
235270#
236271# # Add value numbers to the top of each bar
237- # for p in ax.patches :
272+ # for p in ax.patcplot_totals_by_producthes :
238273# ax.annotate(
239274# format(p.get_height(), ",.0f"),
240275# (p.get_x() + p.get_width() / 2.0, p.get_height()),
@@ -265,12 +300,11 @@ def millions_formatter(x, pos):
265300# plt.show()
266301#
267302# shared.update_readme(
268- # PATHS,
303+ # args,
304+ # SECTION,
305+ # "Country Report",
269306# image_path,
270- # "Google Custom Search",
271307# "Number of Google Webpages Licensed by Country",
272- # "Country Report",
273- # args,
274308# )
275309#
276310# LOGGER.info("Visualization by country created.")
@@ -343,25 +377,24 @@ def millions_formatter(x, pos):
343377# plt.show()
344378#
345379# shared.update_readme(
346- # PATHS,
380+ # args,
381+ # SECTION,
382+ # "Language Report",
347383# image_path,
348- # "Google Custom Search",
349384# "Number of Google Webpages Licensed by Language",
350- # "Language Report",
351- # args,
352385# )
353386#
354387# LOGGER.info("Visualization by language created.")
355388
356389
357390def main ():
358391 args = parse_arguments ()
359- args .logger = LOGGER
360392 shared .log_paths (LOGGER , PATHS )
361393 shared .git_fetch_and_merge (args , PATHS ["repo" ])
362394
363- plot_top_25_tools (args )
395+ gcs_intro (args )
364396 plot_totals_by_product (args )
397+ plot_top_25_tools (args )
365398 # plot_by_country(data, args)
366399 # plot_by_language(data, args)
367400
0 commit comments