Skip to content

Commit 8bb31a3

Browse files
committed
merge and refine fixes to streaming decodes of strings, mark 0.2.4-alpha2
1 parent 9ed0847 commit 8bb31a3

File tree

3 files changed

+21
-37
lines changed

3 files changed

+21
-37
lines changed

project.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
(defproject byte-streams "0.2.4-alpha1"
1+
(defproject byte-streams "0.2.4-alpha2"
22
:description "A simple way to handle the menagerie of Java byte represenations."
33
:license {:name "MIT License"
44
:url "http://opensource.org/licenses/MIT"}

src/byte_streams/char_sequence.clj

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,15 @@
3333
(parse-result (.decode decoder in out false)))
3434

3535
(defn flush
36-
([decoder out] (flush decoder (ByteBuffer/allocate 0) out))
37-
([^CharsetDecoder decoder ^ByteBuffer in ^CharBuffer out]
38-
(and
39-
(parse-result (.decode decoder in out true))
40-
(parse-result (.flush decoder out)))))
41-
42-
(defn has-remaining-bytes? [^ByteBuffer byte-buffer]
43-
{:pre [(some? byte-buffer)]}
44-
(.hasRemaining byte-buffer))
36+
[^CharsetDecoder decoder ^ByteBuffer in ^CharBuffer out]
37+
(parse-result (.decode decoder (or in (ByteBuffer/allocate 0)) out true))
38+
(parse-result (.flush decoder out)))
4539

46-
(defn merge-byte-buffers [^ByteBuffer l ^ByteBuffer r]
47-
{:pre [(some? l) (some? r)]}
48-
(-> (ByteBuffer/allocate (+ (.remaining l) (.remaining r)))
49-
(.put l)
50-
(.put r)
51-
.flip))
40+
(defn concat-bytes [^ByteBuffer a ^ByteBuffer b]
41+
(let [buf (ByteBuffer/allocate (+ (.remaining a) (.remaining b)))]
42+
(.put buf a)
43+
(.put buf b)
44+
(.flip buf)))
5245

5346
(defn lazy-char-buffer-sequence
5447
[^CharsetDecoder decoder
@@ -76,34 +69,28 @@
7669
(lazy-char-buffer-sequence decoder chunk-size extra-bytes close-fn byte-source))
7770

7871
(if-let [in (byte-source chunk-size)]
79-
(let [expanded-in (if (some-> extra-bytes has-remaining-bytes?)
80-
;; in case of underflow we need to pass new buffer
81-
;; containing remaining bytes from the initial input
82-
;; along with some new bytes to the CharsetDecoder
83-
(merge-byte-buffers extra-bytes in)
84-
in)
85-
result (decode decoder expanded-in out)]
72+
(let [in (if (and extra-bytes (.hasRemaining extra-bytes))
73+
(concat-bytes extra-bytes in)
74+
in)
75+
result (decode decoder in out)]
8676
(cons
8777
(.flip out)
8878
(lazy-char-buffer-sequence
8979
decoder
9080
chunk-size
91-
(when (has-remaining-bytes? expanded-in)
92-
expanded-in)
81+
(when (.hasRemaining ^ByteBuffer in) in)
9382
close-fn
9483
byte-source)))
9584
(do
96-
(if (some? extra-bytes)
97-
(flush decoder extra-bytes out)
98-
(flush decoder out))
85+
(flush decoder extra-bytes out)
9986
(when close-fn (close-fn))
10087
(.flip out)))))))
10188

10289
(defn decode-byte-source
10390
[byte-source
10491
close-fn
10592
{:keys [chunk-size encoding on-encoding-error]
106-
:or {chunk-size 4096
93+
:or {chunk-size 1024
10794
on-encoding-error :replace
10895
encoding "UTF-8"}}]
10996
(let [action (coding-error-action on-encoding-error)

test/byte_streams_test.clj

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,8 @@
133133

134134
(deftest test-unicode-decoding
135135
(let [three-byte-char ""
136-
text (apply str (repeat 10000 three-byte-char))
137-
text-bytes (.getBytes text "utf-8")]
138-
(is (bytes= text-bytes
139-
(.getBytes (convert text-bytes String) "utf-8")))
140-
(is (bytes= text-bytes
141-
(.getBytes (convert (ByteArrayInputStream. text-bytes) String) "utf-8")))
142-
(is (bytes= text-bytes
143-
(.getBytes (convert (ByteArrayInputStream. text-bytes) String {:chunk-size 100}) "utf-8")))))
136+
text (apply str (repeat 1e4 three-byte-char))
137+
text-bytes (to-byte-array text)]
138+
(is (bytes= text-bytes (-> text-bytes to-string to-byte-array)))
139+
(is (bytes= text-bytes (-> text-bytes to-input-stream to-string to-byte-array)))
140+
(is (bytes= text-bytes (-> text-bytes (to-input-stream {:chunk-size 128}) to-string to-byte-array)))))

0 commit comments

Comments
 (0)