Skip to content

Commit f06170a

Browse files
authored
feat: parse html content (#4)
1 parent 02d2074 commit f06170a

File tree

6 files changed

+219
-2
lines changed

6 files changed

+219
-2
lines changed

lib/sources/factory.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const _ = require('lodash')
66

77
const SimpleSource = require('./simple-source')
88
const JsSource = require('./js-source')
9+
const HtmlSource = require('./html-source')
910

1011
const FILE_PREFIX = 'file://'
1112

@@ -66,6 +67,8 @@ class SourceFactory {
6667
static _createSource(file, opts) {
6768
if (file.type === 'js' && !opts.simple) {
6869
return new JsSource(file.content)
70+
} else if (file.type === 'html' && !opts.simple) {
71+
return new HtmlSource(file.content)
6972
}
7073

7174
return new SimpleSource(file.content)

lib/sources/html-source.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
const _ = require('lodash')
2+
const HtmlParser = require('htmlparser2').Parser
3+
4+
const debug = require('debug')('nukecss:html-source')
5+
const SimpleSource = require('./simple-source')
6+
7+
class HtmlSource {
8+
constructor(text, opts = {}) {
9+
this._text = text
10+
11+
let tokens = opts.tokens
12+
if (!tokens) {
13+
try {
14+
tokens = HtmlSource.tokenizeHtml(text, opts)
15+
} catch (err) {
16+
debug(err)
17+
return
18+
}
19+
}
20+
21+
this._tokens = tokens
22+
this._tokensArray = Array.from(tokens)
23+
}
24+
25+
get type() {
26+
return 'html'
27+
}
28+
29+
_findWholeSelectorInTokens(selector) {
30+
return this._tokensArray.find(token => SimpleSource.textContains(token, selector))
31+
}
32+
33+
contains(selector) {
34+
if (this._tokens) {
35+
return Boolean(this._tokens.has(selector) ||
36+
this._findWholeSelectorInTokens(selector))
37+
} else {
38+
return SimpleSource.textContains(this._text, selector)
39+
}
40+
}
41+
42+
join(that) {
43+
if (that.type !== 'html') {
44+
throw new Error('HtmlSource can only be joined with HtmlSource')
45+
}
46+
47+
const thisTokens = this._tokensArray || []
48+
const thatTokens = that._tokensArray || []
49+
const tokens = new Set(thisTokens.concat(thatTokens))
50+
const joiner = '\n<!-- joined by nukecss -->\n'
51+
return new HtmlSource(`${this._text}${joiner}${that._text}`, {tokens})
52+
}
53+
54+
static tokenizeHtml(text) {
55+
const tokens = new Set()
56+
57+
const parser = new HtmlParser({
58+
onopentag(name, attributes) {
59+
[name, attributes.id, attributes.class]
60+
.filter(candidate => typeof candidate === 'string')
61+
.forEach(candidate => tokens.add(candidate))
62+
},
63+
}, {decodeEntities: true})
64+
65+
parser.write(text)
66+
parser.end()
67+
return tokens
68+
}
69+
}
70+
71+
module.exports = HtmlSource

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"esprima": "^3.1.3",
5252
"glob": "^7.1.1",
5353
"gonzales-pe": "^4.0.3",
54+
"htmlparser2": "^3.9.2",
5455
"lodash": "^4.17.4",
5556
"postcss": "^5.2.15"
5657
},

test/sources/factory.test.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,22 @@ describe('sources/factory.js', () => {
7575
expect(sources).to.have.length(2)
7676
})
7777

78-
it('should use the proper source', () => {
78+
it('should use JsSource', () => {
7979
const content = 'const foobar = "baz"'
8080
const sources = SourceFactory.fromObject({content, type: 'js'})
8181
expect(sources).to.have.length(1)
8282
expect(sources[0].contains('foobar')).to.equal(false)
8383
expect(sources[0].contains('baz')).to.equal(true)
8484
})
8585

86+
it('should use HtmlSource', () => {
87+
const content = '<html><p class="baz">foobar</p></html>'
88+
const sources = SourceFactory.fromObject({content, type: 'html'})
89+
expect(sources).to.have.length(1)
90+
expect(sources[0].contains('foobar')).to.equal(false)
91+
expect(sources[0].contains('baz')).to.equal(true)
92+
})
93+
8694
it('should infer the proper source type from the extension', () => {
8795
const filePath = path.join(__dirname, '../fixtures/content.js')
8896
const sources = SourceFactory.fromObject({path: filePath})

test/sources/html-source.test.js

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
const SimpleSource = require('../../lib/sources/simple-source.js')
2+
const HtmlSource = require('../../lib/sources/html-source.js')
3+
4+
describe('sources/html-source.js', () => {
5+
it('should return the type', () => {
6+
expect(new HtmlSource('<html></html>')).to.have.property('type', 'html')
7+
})
8+
9+
describe('#join', () => {
10+
it('should join to another HtmlSource', () => {
11+
const sourceA = new HtmlSource('<html></html>')
12+
const sourceB = new HtmlSource('<html></html>')
13+
expect(sourceA.join(sourceB)).to.have.property('type', 'html')
14+
expect(sourceB.join(sourceA)).to.have.property('type', 'html')
15+
})
16+
17+
it('should join to a malformed HtmlSource', () => {
18+
const sourceA = new HtmlSource('<html></html>')
19+
const sourceB = new HtmlSource('<html')
20+
expect(sourceA.join(sourceB)).to.have.property('type', 'html')
21+
expect(sourceB.join(sourceA)).to.have.property('type', 'html')
22+
})
23+
24+
it('should not join to another non-HtmlSource', () => {
25+
const sourceA = new HtmlSource('<html></html>')
26+
const sourceB = new SimpleSource('other content')
27+
expect(() => sourceA.join(sourceB)).to.throw()
28+
expect(() => sourceB.join(sourceA)).to.throw()
29+
})
30+
})
31+
32+
describe('#contains', () => {
33+
context('when html is simple', () => {
34+
const html = `
35+
<!DOCTYPE html>
36+
<html lang="en">
37+
<head>
38+
<title>Basic HTML Example</title>
39+
<link href="app.css" rel="stylesheet">
40+
<script src="app.js" type="text/javascript"></script>
41+
</head>
42+
<body>
43+
<div id="my-hero-element" class="container">
44+
<div class="several classes in-a-row">
45+
<h1>My Header</h1>
46+
<p class="lead">Examplelongtext</p>
47+
</div>
48+
</div>
49+
<script>
50+
const myJsVar = "my-javascript-class"
51+
</script>
52+
</body>
53+
</html>
54+
`
55+
56+
const source = new HtmlSource(html)
57+
58+
it('should find tokens as elements', () => {
59+
expect(source.contains('div')).to.equal(true)
60+
expect(source.contains('h1')).to.equal(true)
61+
expect(source.contains('p')).to.equal(true)
62+
expect(source.contains('script')).to.equal(true)
63+
})
64+
65+
it('should find tokens as classes', () => {
66+
expect(source.contains('container')).to.equal(true)
67+
expect(source.contains('lead')).to.equal(true)
68+
})
69+
70+
it('should find tokens as multiple classes', () => {
71+
expect(source.contains('several')).to.equal(true)
72+
expect(source.contains('classes')).to.equal(true)
73+
expect(source.contains('in-a-row')).to.equal(true)
74+
})
75+
76+
it('should find tokens as identifiers', () => {
77+
expect(source.contains('my-hero-element')).to.equal(true)
78+
})
79+
80+
it('should not find tokens as other attribtues', () => {
81+
expect(source.contains('stylesheet')).to.equal(false)
82+
expect(source.contains('javascript')).to.equal(false)
83+
})
84+
85+
it('should not find tokens as text', () => {
86+
expect(source.contains('Header')).to.equal(false)
87+
expect(source.contains('examplelongtext')).to.equal(false)
88+
})
89+
})
90+
})
91+
})

yarn.lock

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,34 @@ doctrine@1.5.0, doctrine@^1.2.2:
593593
esutils "^2.0.2"
594594
isarray "^1.0.0"
595595

596+
dom-serializer@0:
597+
version "0.1.0"
598+
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.1.0.tgz#073c697546ce0780ce23be4a28e293e40bc30c82"
599+
dependencies:
600+
domelementtype "~1.1.1"
601+
entities "~1.1.1"
602+
603+
domelementtype@1, domelementtype@^1.3.0:
604+
version "1.3.0"
605+
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.0.tgz#b17aed82e8ab59e52dd9c19b1756e0fc187204c2"
606+
607+
domelementtype@~1.1.1:
608+
version "1.1.3"
609+
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.1.3.tgz#bd28773e2642881aec51544924299c5cd822185b"
610+
611+
domhandler@^2.3.0:
612+
version "2.3.0"
613+
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.3.0.tgz#2de59a0822d5027fabff6f032c2b25a2a8abe738"
614+
dependencies:
615+
domelementtype "1"
616+
617+
domutils@^1.5.1:
618+
version "1.5.1"
619+
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf"
620+
dependencies:
621+
dom-serializer "0"
622+
domelementtype "1"
623+
596624
dot-prop@^3.0.0:
597625
version "3.0.0"
598626
resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-3.0.0.tgz#1b708af094a49c9a0e7dbcad790aba539dac1177"
@@ -621,6 +649,10 @@ enhance-visitors@^1.0.0:
621649
dependencies:
622650
lodash "^4.13.1"
623651

652+
entities@^1.1.1, entities@~1.1.1:
653+
version "1.1.1"
654+
resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.1.tgz#6e5c2d0a5621b5dadaecef80b90edfb5cd7772f0"
655+
624656
error-ex@^1.2.0:
625657
version "1.3.0"
626658
resolved "https://registry.yarnpkg.com/error-ex/-/error-ex-1.3.0.tgz#e67b43f3e82c96ea3a584ffee0b9fc3325d802d9"
@@ -1240,6 +1272,17 @@ hosted-git-info@^2.1.4, hosted-git-info@^2.1.5:
12401272
version "2.2.0"
12411273
resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.2.0.tgz#7a0d097863d886c0fabbdcd37bf1758d8becf8a5"
12421274

1275+
htmlparser2@^3.9.2:
1276+
version "3.9.2"
1277+
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.9.2.tgz#1bdf87acca0f3f9e53fa4fcceb0f4b4cbb00b338"
1278+
dependencies:
1279+
domelementtype "^1.3.0"
1280+
domhandler "^2.3.0"
1281+
domutils "^1.5.1"
1282+
entities "^1.1.1"
1283+
inherits "^2.0.1"
1284+
readable-stream "^2.0.2"
1285+
12431286
http-signature@~1.1.0:
12441287
version "1.1.1"
12451288
resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.1.1.tgz#df72e267066cd0ac67fb76adf8e134a8fbcf91bf"
@@ -1269,7 +1312,7 @@ inflight@^1.0.4:
12691312
once "^1.3.0"
12701313
wrappy "1"
12711314

1272-
inherits@2, inherits@^2.0.3, inherits@~2.0.0, inherits@~2.0.1:
1315+
inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@~2.0.0, inherits@~2.0.1:
12731316
version "2.0.3"
12741317
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
12751318

0 commit comments

Comments
 (0)