Skip to content

Commit 4536b5f

Browse files
committed
added a job to clean up orphan uploads
1 parent cf08d2c commit 4536b5f

7 files changed

Lines changed: 86 additions & 19 deletions

File tree

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module Jobs
2+
3+
class CleanUpUploads < Jobs::Scheduled
4+
recurrence { hourly }
5+
6+
def execute(args)
7+
8+
uploads_used_in_posts = PostUpload.uniq.pluck(:upload_id)
9+
uploads_used_as_avatars = User.uniq.where('uploaded_avatar_id IS NOT NULL').pluck(:uploaded_avatar_id)
10+
11+
grace_period = [SiteSetting.uploads_grace_period_in_hours, 1].max
12+
13+
Upload.where("created_at < ?", grace_period.hour.ago)
14+
.where("id NOT IN (?)", uploads_used_in_posts + uploads_used_as_avatars)
15+
.find_each do |upload|
16+
# disable this for now.
17+
#upload.destroy
18+
end
19+
20+
end
21+
22+
end
23+
24+
end

app/models/site_setting.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ class SiteSetting < ActiveRecord::Base
184184
setting(:enforce_global_nicknames, true)
185185
setting(:discourse_org_access_key, '')
186186

187+
setting(:uploads_grace_period_in_hours, 1)
187188
setting(:enable_s3_uploads, false)
188189
setting(:s3_access_key_id, '')
189190
setting(:s3_secret_access_key, '')

config/locales/server.en.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ en:
619619

620620
suggested_topics: "Number of suggested topics shown at the bottom of a topic"
621621

622+
uploads_grace_period_in_hours: "Grace period (in hours) before an orphan upload is removed."
622623
enable_s3_uploads: "Place uploads on Amazon S3"
623624
s3_upload_bucket: "The Amazon S3 bucket name that files will be uploaded into. WARNING: must be lowercase (cf. http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html)"
624625
s3_access_key_id: "The Amazon S3 access key id that will be used to upload images"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
class BackfillPostUploadReverseIndex < ActiveRecord::Migration
2+
3+
def up
4+
# clean the reverse index
5+
execute "TRUNCATE TABLE post_uploads"
6+
7+
# fill the reverse index up
8+
Post.select([:id, :cooked]).find_each do |post|
9+
doc = Nokogiri::HTML::fragment(post.cooked)
10+
# images
11+
doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) }
12+
# thumbnails and/or attachments
13+
doc.search("a").each { |a| add_to_reverse_index(a['href'], post.id) }
14+
end
15+
end
16+
17+
def add_to_reverse_index(url, post_id)
18+
# make sure we have a url to insert
19+
return unless url.present?
20+
# local uploads are relative
21+
if index = url.index(local_base_url)
22+
url = url[index..-1]
23+
end
24+
# filter out non-uploads
25+
return unless url.starts_with?(local_base_url) || url.starts_with?(s3_base_url)
26+
# update the reverse index
27+
execute "INSERT INTO post_uploads (upload_id, post_id)
28+
SELECT u.id, #{post_id}
29+
FROM uploads u
30+
WHERE u.url = '#{url}'
31+
AND NOT EXISTS (SELECT 1 FROM post_uploads WHERE upload_id = u.id AND post_id = #{post_id})"
32+
end
33+
34+
def local_base_url
35+
@local_base_url ||= "/uploads/#{RailsMultisite::ConnectionManagement.current_db}"
36+
end
37+
38+
def s3_base_url
39+
@s3_base_url ||= "//#{SiteSetting.s3_upload_bucket.downcase}.s3.amazonaws.com"
40+
end
41+
42+
end

lib/cooked_post_processor.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@ def initialize(post, opts={})
1616
end
1717

1818
def post_process
19+
clean_up_reverse_index
1920
post_process_attachments
2021
post_process_images
2122
post_process_oneboxes
2223
end
2324

25+
def clean_up_reverse_index
26+
PostUpload.delete_all(post_id: @post.id)
27+
end
28+
2429
def post_process_attachments
2530
attachments.each do |attachment|
2631
href = attachment['href']

lib/tasks/images.rake

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,6 @@ task "images:compress" => :environment do
1010
end
1111
end
1212

13-
desc "clean orphan uploaded files"
14-
task "images:clean_orphans" => :environment do
15-
RailsMultisite::ConnectionManagement.each_connection do |db|
16-
puts "Cleaning up #{db}"
17-
# ligthweight safety net to prevent users from wiping all their uploads out
18-
if PostUpload.count == 0 && Upload.count > 0
19-
puts "The reverse index is empty. Make sure you run the `images:reindex` task"
20-
next
21-
end
22-
Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id")
23-
.where("post_uploads.upload_id IS NULL")
24-
.find_each do |u|
25-
u.destroy
26-
putc "."
27-
end
28-
end
29-
puts "\ndone."
30-
end
31-
3213
desc "download all hotlinked images"
3314
task "images:pull_hotlinked" => :environment do
3415
RailsMultisite::ConnectionManagement.each_connection do |db|

spec/components/cooked_post_processor_spec.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
let(:post_process) { sequence("post_process") }
1111

1212
it "post process in sequence" do
13+
cpp.expects(:clean_up_reverse_index).in_sequence(post_process)
1314
cpp.expects(:post_process_attachments).in_sequence(post_process)
1415
cpp.expects(:post_process_images).in_sequence(post_process)
1516
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
@@ -18,6 +19,18 @@
1819

1920
end
2021

22+
context "clean_up_reverse_index" do
23+
24+
let(:post) { build(:post) }
25+
let(:cpp) { CookedPostProcessor.new(post) }
26+
27+
it "cleans the reverse index up for the current post" do
28+
PostUpload.expects(:delete_all).with(post_id: post.id)
29+
cpp.clean_up_reverse_index
30+
end
31+
32+
end
33+
2134
context "post_process_attachments" do
2235

2336
context "with attachment" do

0 commit comments

Comments
 (0)