Skip to content

Commit 0c3aa78

Browse files
committed
Fix for GetInnerText #155
1 parent 6ab4a63 commit 0c3aa78

File tree

4 files changed

+26
-84
lines changed

4 files changed

+26
-84
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Released on tbd.
1212
- Fixed inclusion of CSS from stylesheets (#116, #140)
1313
- Fixed style empty if `text-align` is `start` (#151)
1414
- Fixed computation of priority in CSS rules using multi selector
15+
- Fixed `GetInnerText` multi-line / text node behavior (#155) @Seyden
1516
- Added further compactification of CSS tuples (#89, #93)
1617
- Added support for 8-digit hex color codes (#132)
1718
- Added more CSSOM possibilities and helpers (#6)

CONTRIBUTORS.md

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ AngleSharp.Css contains code written by (in order of first pull request / commit
1616
* [Bastian Buchholz](https://github.com/campersau)
1717
* [Fraaankes](https://github.com/Fraaankes)
1818
* [Eric Mutta](https://github.com/ericmutta)
19+
* [Seyden](https://github.com/Seyden)
1920

2021
Without these awesome people AngleSharp.Css could not exist. Thanks to everyone for your contributions! :beers:
2122

src/AngleSharp.Css.Tests/Extensions/InnerText.cs

+3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ public void SetInnerText(String fixture, String expectedInnerText, String expect
3737
// paragraph
3838
[TestCase("<p>test</p>", "test")]
3939
[TestCase("<p>test1</p><p>test2</p>", "test1\n\ntest2")]
40+
[TestCase("<p>test1</p>\n<p>test2</p>", "test1\n\ntest2")]
41+
[TestCase("<p>test1</p>\n \n <p>test2</p>", "test1\n\ntest2")]
42+
[TestCase("<p>test1</p>a\n \n b<p>test2</p>", "test1\n\na b\n\ntest2")]
4043
// block-level
4144
[TestCase("<div>test1</div><div>test2</div><div>test3</div>", "test1\ntest2\ntest3")]
4245
[TestCase(@"test1<span style=""display:block"">test2</span>test3", "test1\ntest2\ntest3")]

src/AngleSharp.Css/Extensions/ElementExtensions.cs

+21-84
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
163163
{
164164
var elementHidden = new Nullable<Boolean>();
165165

166-
if (elementStyle != null)
166+
if (elementStyle is not null)
167167
{
168168
if (!String.IsNullOrEmpty(elementStyle.GetDisplay()))
169169
{
@@ -196,13 +196,13 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
196196
var lastLine = node.NextSibling is null ||
197197
String.IsNullOrEmpty(node.NextSibling.TextContent) ||
198198
node.NextSibling is IHtmlBreakRowElement;
199-
ProcessText(textElement.Data, sb, parentStyle, lastLine);
199+
ProcessText(textElement.Data, sb, parentStyle, lastLine, requiredLineBreakCounts);
200200
}
201201
else if (node is IHtmlBreakRowElement)
202202
{
203203
sb.Append(Symbols.LineFeed);
204204
}
205-
else if (elementStyle != null && ((node is IHtmlTableCellElement && String.IsNullOrEmpty(elementStyle.GetDisplay())) || elementStyle.GetDisplay() == CssKeywords.TableCell))
205+
else if (elementStyle is not null && ((node is IHtmlTableCellElement && String.IsNullOrEmpty(elementStyle.GetDisplay())) || elementStyle.GetDisplay() == CssKeywords.TableCell))
206206
{
207207
if (node.NextSibling is IElement nextSibling)
208208
{
@@ -214,7 +214,7 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
214214
}
215215
}
216216
}
217-
else if (elementStyle != null && ((node is IHtmlTableRowElement && String.IsNullOrEmpty(elementStyle.GetDisplay())) || elementStyle.GetDisplay() == CssKeywords.TableRow))
217+
else if (elementStyle is not null && ((node is IHtmlTableRowElement && String.IsNullOrEmpty(elementStyle.GetDisplay())) || elementStyle.GetDisplay() == CssKeywords.TableRow))
218218
{
219219
if (node.NextSibling is IElement nextSibling)
220220
{
@@ -243,7 +243,7 @@ private static void ItcInCssBox(ICssStyleDeclaration elementStyle, ICssStyleDecl
243243
}
244244
}
245245

246-
if (elementStyle != null)
246+
if (elementStyle is not null)
247247
{
248248
if (IsBlockLevelDisplay(elementStyle.GetDisplay()))
249249
{
@@ -297,105 +297,42 @@ public static IEnumerable<TElement> SetStyle<TElement>(this IEnumerable<TElement
297297

298298
private static Boolean HasCssBox(INode node)
299299
{
300-
switch (node.NodeName)
300+
return node.NodeName switch
301301
{
302-
case "CANVAS":
303-
case "COL":
304-
case "COLGROUP":
305-
case "DETAILS":
306-
case "FRAME":
307-
case "FRAMESET":
308-
case "IFRAME":
309-
case "IMG":
310-
case "INPUT":
311-
case "LINK":
312-
case "METER":
313-
case "PROGRESS":
314-
case "TEMPLATE":
315-
case "TEXTAREA":
316-
case "VIDEO":
317-
case "WBR":
318-
case "SCRIPT":
319-
case "STYLE":
320-
case "NOSCRIPT":
321-
return false;
322-
default:
323-
return true;
324-
}
302+
"CANVAS" or "COL" or "COLGROUP" or "DETAILS" or "FRAME" or "FRAMESET" or "IFRAME" or "IMG" or "INPUT" or "LINK" or "METER" or "PROGRESS" or "TEMPLATE" or "TEXTAREA" or "VIDEO" or "WBR" or "SCRIPT" or "STYLE" or "NOSCRIPT" => false,
303+
_ => true,
304+
};
325305
}
326306

327307
private static Boolean IsBlockLevelDisplay(String display)
328308
{
329309
// https://www.w3.org/TR/css-display-3/#display-value-summary
330310
// https://hg.mozilla.org/mozilla-central/file/0acceb224b7d/servo/components/layout/query.rs#l1016
331-
switch (display)
311+
return display switch
332312
{
333-
case "block":
334-
case "flow-root":
335-
case "flex":
336-
case "grid":
337-
case "table":
338-
case "table-caption":
339-
return true;
340-
default:
341-
return false;
342-
}
313+
"block" or "flow-root" or "flex" or "grid" or "table" or "table-caption" => true,
314+
_ => false,
315+
};
343316
}
344317

345318
private static Boolean IsBlockLevel(INode node)
346319
{
347320
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
348-
switch (node.NodeName)
321+
return node.NodeName switch
349322
{
350-
case "ADDRESS":
351-
case "ARTICLE":
352-
case "ASIDE":
353-
case "BLOCKQUOTE":
354-
case "CANVAS":
355-
case "DD":
356-
case "DIV":
357-
case "DL":
358-
case "DT":
359-
case "FIELDSET":
360-
case "FIGCAPTION":
361-
case "FIGURE":
362-
case "FOOTER":
363-
case "FORM":
364-
case "H1":
365-
case "H2":
366-
case "H3":
367-
case "H4":
368-
case "H5":
369-
case "H6":
370-
case "HEADER":
371-
case "GROUP":
372-
case "HR":
373-
case "LI":
374-
case "MAIN":
375-
case "NAV":
376-
case "NOSCRIPT":
377-
case "OL":
378-
case "OPTION":
379-
case "OUTPUT":
380-
case "P":
381-
case "PRE":
382-
case "SECTION":
383-
case "TABLE":
384-
case "TFOOT":
385-
case "UL":
386-
case "VIDEO":
387-
return true;
388-
default:
389-
return false;
390-
}
323+
"ADDRESS" or "ARTICLE" or "ASIDE" or "BLOCKQUOTE" or "CANVAS" or "DD" or "DIV" or "DL" or "DT" or "FIELDSET" or "FIGCAPTION" or "FIGURE" or "FOOTER" or "FORM" or "H1" or "H2" or "H3" or "H4" or "H5" or "H6" or "HEADER" or "GROUP" or "HR" or "LI" or "MAIN" or "NAV" or "NOSCRIPT" or "OL" or "OPTION" or "OUTPUT" or "P" or "PRE" or "SECTION" or "TABLE" or "TFOOT" or "UL" or "VIDEO" => true,
324+
_ => false,
325+
};
391326
}
392327

393-
private static void ProcessText(String text, StringBuilder sb, ICssStyleDeclaration style, Boolean lastLine)
328+
private static Boolean IsWhiteSpace(Char c) => Char.IsWhiteSpace(c) && c != Symbols.NoBreakSpace;
329+
330+
private static void ProcessText(String text, StringBuilder sb, ICssStyleDeclaration style, Boolean lastLine, Dictionary<Int32, Int32> requiredLineBreakCounts)
394331
{
395332
var startIndex = sb.Length;
396333
var whiteSpace = style?.GetWhiteSpace();
397334
var textTransform = style?.GetTextTransform();
398-
var isWhiteSpace = startIndex > 0 ? Char.IsWhiteSpace(sb[startIndex - 1]) && sb[startIndex - 1] != Symbols.NoBreakSpace : true;
335+
var isWhiteSpace = startIndex <= 0 || IsWhiteSpace(sb[startIndex - 1]) || (requiredLineBreakCounts.ContainsKey(startIndex) && IsWhiteSpace(text[0]));
399336

400337
for (var i = 0; i < text.Length; i++)
401338
{

0 commit comments

Comments
 (0)