Skip to content

Commit cab3502

Browse files
author
Navin
committed
Replace PostAnalyser module with a class
Encapsulate Post#cooked_document as well Include specs for PostAnalyzer class
1 parent 0606725 commit cab3502

4 files changed

Lines changed: 280 additions & 30 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ config/discourse.pill
3535
# Ignore all logfiles and tempfiles.
3636
/log/*.log
3737
/tmp
38+
/logfile
3839

3940
# Ignore Eclipse .project file
4041
/.project

app/models/post.rb

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,14 @@
44
require_dependency 'post_revisor'
55
require_dependency 'enum'
66
require_dependency 'trashable'
7-
require_dependency 'post_analyser'
7+
require_dependency 'post_analyzer'
88

99
require 'archetype'
1010
require 'digest/sha1'
1111

1212
class Post < ActiveRecord::Base
1313
include RateLimiter::OnCreateRecord
1414
include Trashable
15-
include PostAnalyser
1615

1716
versioned if: :raw_changed?
1817

@@ -90,11 +89,6 @@ def raw_hash
9089
Digest::SHA1.hexdigest(raw.gsub(/\s+/, "").downcase)
9190
end
9291

93-
def cooked_document
94-
self.cooked ||= cook(raw, topic_id: topic_id)
95-
@cooked_document ||= Nokogiri::HTML.fragment(cooked)
96-
end
97-
9892
def reset_cooked
9993
@cooked_document = nil
10094
self.cooked = nil
@@ -104,16 +98,18 @@ def self.white_listed_image_classes
10498
@white_listed_image_classes ||= ['avatar', 'favicon', 'thumbnail']
10599
end
106100

107-
# How many images are present in the post
108-
def image_count
109-
return 0 unless raw.present?
101+
def post_analyzer
102+
@post_analyzer = PostAnalyzer.new(raw, topic_id)
103+
end
110104

111-
cooked_document.search("img").reject do |t|
112-
dom_class = t["class"]
113-
if dom_class
114-
(Post.white_listed_image_classes & dom_class.split(" ")).count > 0
115-
end
116-
end.count
105+
%w{raw_mentions linked_hosts image_count link_count raw_links}.each do |attr|
106+
define_method(attr) do
107+
PostAnalyzer.new(raw, topic_id).send(attr)
108+
end
109+
end
110+
111+
def cook(*args)
112+
PostAnalyzer.new(raw, topic_id).cook(*args)
117113
end
118114

119115

@@ -236,20 +232,6 @@ def excerpt(maxlength = nil, options = {})
236232
Post.excerpt(cooked, maxlength, options)
237233
end
238234

239-
# What we use to cook posts
240-
def cook(*args)
241-
cooked = PrettyText.cook(*args)
242-
243-
# If we have any of the oneboxes in the cache, throw them in right away, don't
244-
# wait for the post processor.
245-
dirty = false
246-
result = Oneboxer.apply(cooked) do |url, elem|
247-
Oneboxer.render_from_cache(url)
248-
end
249-
250-
cooked = result.to_html if result.changed?
251-
cooked
252-
end
253235

254236
# A list of versions including the initial version
255237
def all_versions

app/models/post_analyzer.rb

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
class PostAnalyzer
2+
3+
attr_accessor :cooked, :raw
4+
5+
def initialize(raw, topic_id)
6+
@raw = raw
7+
@topic_id = topic_id
8+
end
9+
10+
def cooked_document
11+
@cooked = cook(@raw, topic_id: @topic_id)
12+
@cooked_document = Nokogiri::HTML.fragment(@cooked)
13+
end
14+
15+
# What we use to cook posts
16+
def cook(*args)
17+
cooked = PrettyText.cook(*args)
18+
19+
# If we have any of the oneboxes in the cache, throw them in right away, don't
20+
# wait for the post processor.
21+
dirty = false
22+
result = Oneboxer.apply(cooked) do |url, elem|
23+
Oneboxer.render_from_cache(url)
24+
end
25+
26+
cooked = result.to_html if result.changed?
27+
cooked
28+
end
29+
30+
# How many images are present in the post
31+
def image_count
32+
return 0 unless @raw.present?
33+
34+
cooked_document.search("img").reject do |t|
35+
dom_class = t["class"]
36+
if dom_class
37+
(Post.white_listed_image_classes & dom_class.split(" ")).count > 0
38+
end
39+
end.count
40+
end
41+
42+
def raw_mentions
43+
return [] if @raw.blank?
44+
45+
# We don't count mentions in quotes
46+
return @raw_mentions if @raw_mentions.present?
47+
raw_stripped = @raw.gsub(/\[quote=(.*)\]([^\[]*?)\[\/quote\]/im, '')
48+
49+
# Strip pre and code tags
50+
doc = Nokogiri::HTML.fragment(raw_stripped)
51+
doc.search("pre").remove
52+
doc.search("code").remove
53+
54+
results = doc.to_html.scan(PrettyText.mention_matcher)
55+
@raw_mentions = results.uniq.map { |un| un.first.downcase.gsub!(/^@/, '') }
56+
end
57+
58+
# Count how many hosts are linked in the post
59+
def linked_hosts
60+
return {} if raw_links.blank?
61+
62+
return @linked_hosts if @linked_hosts.present?
63+
64+
@linked_hosts = {}
65+
raw_links.each do |u|
66+
uri = URI.parse(u)
67+
host = uri.host
68+
@linked_hosts[host] ||= 1
69+
end
70+
@linked_hosts
71+
end
72+
73+
# Returns an array of all links in a post excluding mentions
74+
def raw_links
75+
return [] unless @raw.present?
76+
77+
return @raw_links if @raw_links.present?
78+
79+
# Don't include @mentions in the link count
80+
@raw_links = []
81+
cooked_document.search("a[href]").each do |l|
82+
next if link_is_a_mention?(l)
83+
url = l.attributes['href'].to_s
84+
@raw_links << url
85+
end
86+
@raw_links
87+
end
88+
89+
# How many links are present in the post
90+
def link_count
91+
raw_links.size
92+
end
93+
94+
private
95+
96+
def link_is_a_mention?(l)
97+
html_class = l.attributes['class']
98+
return false if html_class.nil?
99+
html_class.to_s == 'mention' && l.attributes['href'].to_s =~ /^\/users\//
100+
end
101+
end

spec/models/post_analyzer_spec.rb

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
require 'spec_helper'
2+
3+
describe PostAnalyzer do
4+
5+
let(:topic) { Fabricate(:topic) }
6+
let(:default_topic_id) { topic.id }
7+
let(:post_args) do
8+
{user: topic.user, topic: topic}
9+
end
10+
11+
context "links" do
12+
let(:raw_no_links) { "hello world my name is evil trout" }
13+
let(:raw_one_link_md) { "[jlawr](http://www.imdb.com/name/nm2225369)" }
14+
let(:raw_two_links_html) { "<a href='http://disneyland.disney.go.com/'>disney</a> <a href='http://reddit.com'>reddit</a>"}
15+
let(:raw_three_links) { "http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369"}
16+
17+
describe "raw_links" do
18+
it "returns a blank collection for a post with no links" do
19+
post_analyzer = PostAnalyzer.new(raw_no_links, default_topic_id)
20+
post_analyzer.raw_links.should be_blank
21+
end
22+
23+
it "finds a link within markdown" do
24+
post_analyzer = PostAnalyzer.new(raw_one_link_md, default_topic_id)
25+
post_analyzer.raw_links.should == ["http://www.imdb.com/name/nm2225369"]
26+
end
27+
28+
it "can find two links from html" do
29+
post_analyzer = PostAnalyzer.new(raw_two_links_html, default_topic_id)
30+
post_analyzer.raw_links.should == ["http://disneyland.disney.go.com/", "http://reddit.com"]
31+
end
32+
33+
it "can find three links without markup" do
34+
post_analyzer = PostAnalyzer.new(raw_three_links, default_topic_id)
35+
post_analyzer.raw_links.should == ["http://discourse.org", "http://discourse.org/another_url", "http://www.imdb.com/name/nm2225369"]
36+
end
37+
end
38+
39+
describe "linked_hosts" do
40+
it "returns blank with no links" do
41+
post_analyzer = PostAnalyzer.new(raw_no_links, default_topic_id)
42+
post_analyzer.linked_hosts.should be_blank
43+
end
44+
45+
it "returns the host and a count for links" do
46+
post_analyzer = PostAnalyzer.new(raw_two_links_html, default_topic_id)
47+
post_analyzer.linked_hosts.should == {"disneyland.disney.go.com" => 1, "reddit.com" => 1}
48+
end
49+
50+
it "it counts properly with more than one link on the same host" do
51+
post_analyzer = PostAnalyzer.new(raw_three_links, default_topic_id)
52+
post_analyzer.linked_hosts.should == {"discourse.org" => 1, "www.imdb.com" => 1}
53+
end
54+
end
55+
end
56+
57+
describe "image_count" do
58+
let(:raw_post_one_image_md) { "![sherlock](http://bbc.co.uk/sherlock.jpg)" }
59+
let(:raw_post_two_images_html) { "<img src='http://discourse.org/logo.png'> <img src='http://bbc.co.uk/sherlock.jpg'>" }
60+
let(:raw_post_with_avatars) { '<img alt="smiley" title=":smiley:" src="/assets/emoji/smiley.png" class="avatar"> <img alt="wink" title=":wink:" src="/assets/emoji/wink.png" class="avatar">' }
61+
let(:raw_post_with_favicon) { '<img src="/assets/favicons/wikipedia.png" class="favicon">' }
62+
let(:raw_post_with_thumbnail) { '<img src="/assets/emoji/smiley.png" class="thumbnail">' }
63+
let(:raw_post_with_two_classy_images) { "<img src='http://discourse.org/logo.png' class='classy'> <img src='http://bbc.co.uk/sherlock.jpg' class='classy'>" }
64+
65+
it "returns 0 images for an empty post" do
66+
post_analyzer = PostAnalyzer.new("Hello world", nil)
67+
post_analyzer.image_count.should == 0
68+
end
69+
70+
it "finds images from markdown" do
71+
post_analyzer = PostAnalyzer.new(raw_post_one_image_md, default_topic_id)
72+
post_analyzer.image_count.should == 1
73+
end
74+
75+
it "finds images from HTML" do
76+
post_analyzer = PostAnalyzer.new(raw_post_two_images_html, default_topic_id)
77+
post_analyzer.image_count.should == 2
78+
end
79+
80+
it "doesn't count avatars as images" do
81+
post_analyzer = PostAnalyzer.new(raw_post_with_avatars, default_topic_id)
82+
post_analyzer.image_count.should == 0
83+
end
84+
85+
it "doesn't count favicons as images" do
86+
post_analyzer = PostAnalyzer.new(raw_post_with_favicon, default_topic_id)
87+
post_analyzer.image_count.should == 0
88+
end
89+
90+
it "doesn't count thumbnails as images" do
91+
post_analyzer = PostAnalyzer.new(raw_post_with_thumbnail, default_topic_id)
92+
post_analyzer.image_count.should == 0
93+
end
94+
95+
it "doesn't count whitelisted images" do
96+
Post.stubs(:white_listed_image_classes).returns(["classy"])
97+
post_analyzer = PostAnalyzer.new(raw_post_with_two_classy_images, default_topic_id)
98+
post_analyzer.image_count.should == 0
99+
end
100+
end
101+
102+
describe "link_count" do
103+
let(:raw_post_one_link_md) { "[sherlock](http://www.bbc.co.uk/programmes/b018ttws)" }
104+
let(:raw_post_two_links_html) { "<a href='http://discourse.org'>discourse</a> <a href='http://twitter.com'>twitter</a>" }
105+
let(:raw_post_with_mentions) { "hello @novemberkilo how are you doing?" }
106+
107+
it "returns 0 links for an empty post" do
108+
post_analyzer = PostAnalyzer.new("Hello world", nil)
109+
post_analyzer.link_count.should == 0
110+
end
111+
112+
it "returns 0 links for a post with mentions" do
113+
post_analyzer = PostAnalyzer.new(raw_post_with_mentions, default_topic_id)
114+
post_analyzer.link_count.should == 0
115+
end
116+
117+
it "finds links from markdown" do
118+
post_analyzer = PostAnalyzer.new(raw_post_one_link_md, default_topic_id)
119+
post_analyzer.link_count.should == 1
120+
end
121+
122+
it "finds links from HTML" do
123+
post_analyzer = PostAnalyzer.new(raw_post_two_links_html, default_topic_id)
124+
post_analyzer.link_count.should == 2
125+
end
126+
end
127+
128+
129+
describe "raw_mentions" do
130+
131+
it "returns an empty array with no matches" do
132+
post_analyzer = PostAnalyzer.new("Hello Jake and Finn!", default_topic_id)
133+
post_analyzer.raw_mentions.should == []
134+
end
135+
136+
it "returns lowercase unique versions of the mentions" do
137+
post_analyzer = PostAnalyzer.new("@Jake @Finn @Jake", default_topic_id)
138+
post_analyzer.raw_mentions.should == ['jake', 'finn']
139+
end
140+
141+
it "ignores pre" do
142+
post_analyzer = PostAnalyzer.new("<pre>@Jake</pre> @Finn", default_topic_id)
143+
post_analyzer.raw_mentions.should == ['finn']
144+
end
145+
146+
it "catches content between pre tags" do
147+
post_analyzer = PostAnalyzer.new("<pre>hello</pre> @Finn <pre></pre>", default_topic_id)
148+
post_analyzer.raw_mentions.should == ['finn']
149+
end
150+
151+
it "ignores code" do
152+
post_analyzer = PostAnalyzer.new("@Jake <code>@Finn</code>", default_topic_id)
153+
post_analyzer.raw_mentions.should == ['jake']
154+
end
155+
156+
it "ignores quotes" do
157+
post_analyzer = PostAnalyzer.new("[quote=\"Evil Trout\"]@Jake[/quote] @Finn", default_topic_id)
158+
post_analyzer.raw_mentions.should == ['finn']
159+
end
160+
161+
it "handles underscore in username" do
162+
post_analyzer = PostAnalyzer.new("@Jake @Finn @Jake_Old", default_topic_id)
163+
post_analyzer.raw_mentions.should == ['jake', 'finn', 'jake_old']
164+
end
165+
end
166+
end

0 commit comments

Comments
 (0)