11use crate :: parser:: Extractor ;
2+ use fxhash:: FxHashSet ;
23use rayon:: prelude:: * ;
34use std:: path:: PathBuf ;
45use tracing:: event;
@@ -11,14 +12,7 @@ pub mod parser;
1112pub mod utility;
1213pub mod variant;
1314
14- #[ derive( Debug , Clone ) ]
15- pub struct ChangedContent {
16- pub file : Option < PathBuf > ,
17- pub content : Option < String > ,
18- pub extension : String ,
19- }
20-
21- pub fn parse_candidate_strings_from_files ( changed_content : Vec < ChangedContent > ) -> Vec < String > {
15+ fn init_tracing ( ) {
2216 if matches ! ( std:: env:: var( "DEBUG" ) , Ok ( value) if value. eq( "*" ) || value. eq( "1" ) || value. eq( "true" ) || value. contains( "tailwind" ) )
2317 {
2418 tracing_subscriber:: fmt ( )
@@ -27,10 +21,63 @@ pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>)
2721 . compact ( )
2822 . init ( ) ;
2923 }
24+ }
25+
26+ #[ derive( Debug , Clone ) ]
27+ pub struct ChangedContent {
28+ pub file : Option < PathBuf > ,
29+ pub content : Option < String > ,
30+ pub extension : String ,
31+ }
32+
33+ #[ derive( Debug ) ]
34+ pub enum IO {
35+ Sequential = 0b0001 ,
36+ Parallel = 0b0010 ,
37+ }
3038
39+ impl From < u8 > for IO {
40+ fn from ( item : u8 ) -> Self {
41+ match item & 0b0011 {
42+ 0b0001 => IO :: Sequential ,
43+ 0b0010 => IO :: Parallel ,
44+ _ => unimplemented ! ( "Unknown 'IO' strategy" ) ,
45+ }
46+ }
47+ }
48+
49+ #[ derive( Debug ) ]
50+ pub enum Parsing {
51+ Sequential = 0b0100 ,
52+ Parallel = 0b1000 ,
53+ }
54+
55+ impl From < u8 > for Parsing {
56+ fn from ( item : u8 ) -> Self {
57+ match item & 0b1100 {
58+ 0b0100 => Parsing :: Sequential ,
59+ 0b1000 => Parsing :: Parallel ,
60+ _ => unimplemented ! ( "Unknown 'Parsing' strategy" ) ,
61+ }
62+ }
63+ }
64+
65+ pub fn parse_candidate_strings_from_files ( changed_content : Vec < ChangedContent > ) -> Vec < String > {
66+ init_tracing ( ) ;
3167 parse_all_blobs ( read_all_files ( changed_content) )
3268}
3369
70+ pub fn parse_candidate_strings ( input : Vec < ChangedContent > , options : u8 ) -> Vec < String > {
71+ init_tracing ( ) ;
72+
73+ match ( IO :: from ( options) , Parsing :: from ( options) ) {
74+ ( IO :: Sequential , Parsing :: Sequential ) => parse_all_blobs_sync ( read_all_files_sync ( input) ) ,
75+ ( IO :: Sequential , Parsing :: Parallel ) => parse_all_blobs_sync ( read_all_files ( input) ) ,
76+ ( IO :: Parallel , Parsing :: Sequential ) => parse_all_blobs ( read_all_files_sync ( input) ) ,
77+ ( IO :: Parallel , Parsing :: Parallel ) => parse_all_blobs ( read_all_files ( input) ) ,
78+ }
79+ }
80+
3481#[ tracing:: instrument( skip( changed_content) ) ]
3582fn read_all_files ( changed_content : Vec < ChangedContent > ) -> Vec < Vec < u8 > > {
3683 event ! (
@@ -49,6 +96,24 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
4996 . collect ( )
5097}
5198
99+ #[ tracing:: instrument( skip( changed_content) ) ]
100+ fn read_all_files_sync ( changed_content : Vec < ChangedContent > ) -> Vec < Vec < u8 > > {
101+ event ! (
102+ tracing:: Level :: INFO ,
103+ "Reading {:?} file(s)" ,
104+ changed_content. len( )
105+ ) ;
106+
107+ changed_content
108+ . into_iter ( )
109+ . map ( |c| match ( c. file , c. content ) {
110+ ( Some ( file) , None ) => std:: fs:: read ( file) . unwrap ( ) ,
111+ ( None , Some ( content) ) => content. into_bytes ( ) ,
112+ _ => Default :: default ( ) ,
113+ } )
114+ . collect ( )
115+ }
116+
52117#[ tracing:: instrument( skip( blobs) ) ]
53118fn parse_all_blobs ( blobs : Vec < Vec < u8 > > ) -> Vec < String > {
54119 let input: Vec < _ > = blobs. iter ( ) . map ( |blob| & blob[ ..] ) . collect ( ) ;
@@ -72,3 +137,27 @@ fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
72137 result. sort ( ) ;
73138 result
74139}
140+
141+ #[ tracing:: instrument( skip( blobs) ) ]
142+ fn parse_all_blobs_sync ( blobs : Vec < Vec < u8 > > ) -> Vec < String > {
143+ let input: Vec < _ > = blobs. iter ( ) . map ( |blob| & blob[ ..] ) . collect ( ) ;
144+ let input = & input[ ..] ;
145+
146+ let mut result: Vec < String > = input
147+ . iter ( )
148+ . map ( |input| Extractor :: unique ( input, Default :: default ( ) ) )
149+ . fold ( FxHashSet :: default ( ) , |mut a, b| {
150+ a. extend ( b) ;
151+ a
152+ } )
153+ . into_iter ( )
154+ . map ( |s| {
155+ // SAFETY: When we parsed the candidates, we already guaranteed that the byte slices
156+ // are valid, therefore we don't have to re-check here when we want to convert it back
157+ // to a string.
158+ unsafe { String :: from_utf8_unchecked ( s. to_vec ( ) ) }
159+ } )
160+ . collect ( ) ;
161+ result. sort ( ) ;
162+ result
163+ }
0 commit comments