Skip to content

Commit 2ada04e

Browse files
committed
refactor: extract scrubber logic into composable concerns
The three concerns are: - fragment parsing - scrubbing - serialization These are combined in a fourth concern which implements a `#sanitize` method that composes the other concerns like: serialize(scrub(parse_fragment(html))) This should enable us to easily add HTML5 fragment parsing in a subsequent commit.
1 parent 836becd commit 2ada04e

File tree

2 files changed

+169
-151
lines changed

2 files changed

+169
-151
lines changed

lib/rails/html/sanitizer.rb

Lines changed: 168 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module Rails
44
module Html
5-
class Sanitizer # :nodoc:
5+
class Sanitizer
66
class << self
77
def full_sanitizer
88
Rails::Html::FullSanitizer
@@ -36,33 +36,175 @@ def properly_encode(fragment, options)
3636
end
3737
end
3838

39-
# === Rails::Html::FullSanitizer
40-
# Removes all tags but strips out scripts, forms and comments.
41-
#
42-
# full_sanitizer = Rails::Html::FullSanitizer.new
43-
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
44-
# # => Bold no more! See more here...
45-
class FullSanitizer < Sanitizer
46-
def parse_fragment(html)
47-
Loofah.fragment(html)
39+
module Concern # :nodoc:
40+
module ComposedSanitize # :nodoc:
41+
def sanitize(html, options = {})
42+
return unless html
43+
return html if html.empty?
44+
45+
serialize(scrub(parse_fragment(html), options))
46+
end
4847
end
4948

50-
def scrub(fragment)
51-
fragment.scrub!(TextOnlyScrubber.new)
49+
module Parser # :nodoc:
50+
module Html4 # :nodoc:
51+
def parse_fragment(html)
52+
Loofah.html4_fragment(html)
53+
end
54+
end
5255
end
5356

54-
def serialize(fragment)
55-
properly_encode(fragment, encoding: "UTF-8")
57+
module Scrubber # :nodoc:
58+
module Full # :nodoc:
59+
def scrub(fragment, options = {})
60+
fragment.scrub!(TextOnlyScrubber.new)
61+
end
62+
end
63+
64+
module Link # :nodoc:
65+
def initialize
66+
super
67+
@link_scrubber = TargetScrubber.new
68+
@link_scrubber.tags = %w(a)
69+
@link_scrubber.attributes = %w(href)
70+
end
71+
72+
def scrub(fragment, options = {})
73+
fragment.scrub!(@link_scrubber)
74+
end
75+
end
76+
77+
module SafeList # :nodoc:
78+
DEFAULT_ALLOWED_TAGS = Set.new([
79+
"a",
80+
"abbr",
81+
"acronym",
82+
"address",
83+
"b",
84+
"big",
85+
"blockquote",
86+
"br",
87+
"cite",
88+
"code",
89+
"dd",
90+
"del",
91+
"dfn",
92+
"div",
93+
"dl",
94+
"dt",
95+
"em",
96+
"h1",
97+
"h2",
98+
"h3",
99+
"h4",
100+
"h5",
101+
"h6",
102+
"hr",
103+
"i",
104+
"img",
105+
"ins",
106+
"kbd",
107+
"li",
108+
"ol",
109+
"p",
110+
"pre",
111+
"samp",
112+
"small",
113+
"span",
114+
"strong",
115+
"sub",
116+
"sup",
117+
"time",
118+
"tt",
119+
"ul",
120+
"var",
121+
]).freeze
122+
DEFAULT_ALLOWED_ATTRIBUTES = Set.new([
123+
"abbr",
124+
"alt",
125+
"cite",
126+
"class",
127+
"datetime",
128+
"height",
129+
"href",
130+
"lang",
131+
"name",
132+
"src",
133+
"title",
134+
"width",
135+
"xml:lang",
136+
]).freeze
137+
138+
def self.included(klass)
139+
class << klass
140+
attr_accessor :allowed_tags
141+
attr_accessor :allowed_attributes
142+
end
143+
144+
klass.allowed_tags = DEFAULT_ALLOWED_TAGS.dup
145+
klass.allowed_attributes = DEFAULT_ALLOWED_ATTRIBUTES.dup
146+
end
147+
148+
def initialize(prune: false)
149+
@permit_scrubber = PermitScrubber.new(prune: prune)
150+
end
151+
152+
def scrub(fragment, options = {})
153+
if scrubber = options[:scrubber]
154+
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
155+
fragment.scrub!(scrubber)
156+
elsif allowed_tags(options) || allowed_attributes(options)
157+
@permit_scrubber.tags = allowed_tags(options)
158+
@permit_scrubber.attributes = allowed_attributes(options)
159+
fragment.scrub!(@permit_scrubber)
160+
else
161+
fragment.scrub!(:strip)
162+
end
163+
end
164+
165+
def sanitize_css(style_string)
166+
Loofah::HTML5::Scrub.scrub_css(style_string)
167+
end
168+
169+
private
170+
def allowed_tags(options)
171+
options[:tags] || self.class.allowed_tags
172+
end
173+
174+
def allowed_attributes(options)
175+
options[:attributes] || self.class.allowed_attributes
176+
end
177+
end
56178
end
57179

58-
def sanitize(html, options = {})
59-
return unless html
60-
return html if html.empty?
180+
module Serializer # :nodoc:
181+
module UTF8Encode # :nodoc:
182+
def serialize(fragment)
183+
properly_encode(fragment, encoding: "UTF-8")
184+
end
185+
end
61186

62-
serialize(scrub(parse_fragment(html)))
187+
module SimpleString # :nodoc:
188+
def serialize(fragment)
189+
fragment.to_s
190+
end
191+
end
63192
end
64193
end
65194

195+
# === Rails::Html::FullSanitizer
196+
# Removes all tags but strips out scripts, forms and comments.
197+
#
198+
# full_sanitizer = Rails::Html::FullSanitizer.new
199+
# full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
200+
# # => Bold no more! See more here...
201+
class FullSanitizer < Sanitizer
202+
include Concern::ComposedSanitize
203+
include Concern::Parser::Html4
204+
include Concern::Scrubber::Full
205+
include Concern::Serializer::UTF8Encode
206+
end
207+
66208
# === Rails::Html::LinkSanitizer
67209
# Removes +a+ tags and +href+ attributes leaving only the link text.
68210
#
@@ -71,30 +213,10 @@ def sanitize(html, options = {})
71213
#
72214
# => 'Only the link text will be kept.'
73215
class LinkSanitizer < Sanitizer
74-
def initialize
75-
@link_scrubber = TargetScrubber.new
76-
@link_scrubber.tags = %w(a)
77-
@link_scrubber.attributes = %w(href)
78-
end
79-
80-
def parse_fragment(html)
81-
Loofah.fragment(html)
82-
end
83-
84-
def scrub(fragment)
85-
fragment.scrub!(@link_scrubber)
86-
end
87-
88-
def serialize(fragment)
89-
fragment.to_s
90-
end
91-
92-
def sanitize(html, options = {})
93-
return unless html
94-
return html if html.empty?
95-
96-
serialize(scrub(parse_fragment(html)))
97-
end
216+
include Concern::ComposedSanitize
217+
include Concern::Parser::Html4
218+
include Concern::Scrubber::Link
219+
include Concern::Serializer::SimpleString
98220
end
99221

100222
# === Rails::Html::SafeListSanitizer
@@ -140,114 +262,10 @@ def sanitize(html, options = {})
140262
# Safe list via a custom scrubber
141263
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
142264
class SafeListSanitizer < Sanitizer
143-
class << self
144-
attr_accessor :allowed_tags
145-
attr_accessor :allowed_attributes
146-
end
147-
self.allowed_tags = Set.new([
148-
"a",
149-
"abbr",
150-
"acronym",
151-
"address",
152-
"b",
153-
"big",
154-
"blockquote",
155-
"br",
156-
"cite",
157-
"code",
158-
"dd",
159-
"del",
160-
"dfn",
161-
"div",
162-
"dl",
163-
"dt",
164-
"em",
165-
"h1",
166-
"h2",
167-
"h3",
168-
"h4",
169-
"h5",
170-
"h6",
171-
"hr",
172-
"i",
173-
"img",
174-
"ins",
175-
"kbd",
176-
"li",
177-
"ol",
178-
"p",
179-
"pre",
180-
"samp",
181-
"small",
182-
"span",
183-
"strong",
184-
"sub",
185-
"sup",
186-
"time",
187-
"tt",
188-
"ul",
189-
"var",
190-
])
191-
self.allowed_attributes = Set.new([
192-
"abbr",
193-
"alt",
194-
"cite",
195-
"class",
196-
"datetime",
197-
"height",
198-
"href",
199-
"lang",
200-
"name",
201-
"src",
202-
"title",
203-
"width",
204-
"xml:lang",
205-
])
206-
207-
def initialize(prune: false)
208-
@permit_scrubber = PermitScrubber.new(prune: prune)
209-
end
210-
211-
def parse_fragment(html)
212-
Loofah.fragment(html)
213-
end
214-
215-
def scrub(fragment, options = {})
216-
if scrubber = options[:scrubber]
217-
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
218-
fragment.scrub!(scrubber)
219-
elsif allowed_tags(options) || allowed_attributes(options)
220-
@permit_scrubber.tags = allowed_tags(options)
221-
@permit_scrubber.attributes = allowed_attributes(options)
222-
fragment.scrub!(@permit_scrubber)
223-
else
224-
fragment.scrub!(:strip)
225-
end
226-
end
227-
228-
def serialize(fragment)
229-
properly_encode(fragment, encoding: "UTF-8")
230-
end
231-
232-
def sanitize(html, options = {})
233-
return unless html
234-
return html if html.empty?
235-
236-
serialize(scrub(parse_fragment(html), options))
237-
end
238-
239-
def sanitize_css(style_string)
240-
Loofah::HTML5::Scrub.scrub_css(style_string)
241-
end
242-
243-
private
244-
def allowed_tags(options)
245-
options[:tags] || self.class.allowed_tags
246-
end
247-
248-
def allowed_attributes(options)
249-
options[:attributes] || self.class.allowed_attributes
250-
end
265+
include Concern::ComposedSanitize
266+
include Concern::Parser::Html4
267+
include Concern::Scrubber::SafeList
268+
include Concern::Serializer::UTF8Encode
251269
end
252270

253271
WhiteListSanitizer = SafeListSanitizer # :nodoc:

test/sanitizer_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
class SanitizersTest < Minitest::Test
2424
def test_sanitizer_sanitize_raises_not_implemented_error
2525
assert_raises NotImplementedError do
26-
Rails::Html::Sanitizer.new.sanitize("")
26+
Rails::Html::Sanitizer.new.sanitize("asdf")
2727
end
2828
end
2929

0 commit comments

Comments
 (0)