File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -11,10 +11,11 @@ This is highly inefficient as (a) a gzipped WARC file is composed of multiple in
1111
1212For detailed usage, see the examples folder, but minimally...
1313
14- :::python
15- from gzipstream import GzipStreamFile
16- f = open('huge_file.gz') # Any streaming file object that supports `read`
17- gz = GzipStreamFile(f)
14+ ``` python
15+ from gzipstream import GzipStreamFile
16+ f = open (' huge_file.gz' ) # Any streaming file object that supports `read`
17+ gz = GzipStreamFile(f)
18+ ```
1819
1920# License
2021
Original file line number Diff line number Diff line change @@ -19,6 +19,8 @@ def restart_decoder(self):
1919 self .unused_buffer += self .decoder .decompress (unused_raw )
2020
2121 def read (self , size ):
22+ # TODO: Update this to use unconsumed_tail and a StringIO buffer
23+ # http://docs.python.org/2/library/zlib.html#zlib.Decompress.unconsumed_tail
2224 # Check if we need to start a new decoder
2325 if self .decoder and self .decoder .unused_data :
2426 self .restart_decoder ()
@@ -41,6 +43,7 @@ def read(self, size):
4143 return self .read (size )
4244
4345 def readline (self ):
46+ # TODO: This should work in large chunks rather than a byte at a time
4447 chars = []
4548 c = self .read (1 )
4649 while c != '\n ' :
You can’t perform that action at this time.
0 commit comments