@@ -5,15 +5,15 @@ module HTML
5
5
class Sanitizer
6
6
class << self
7
7
def full_sanitizer
8
- Rails ::HTML ::FullSanitizer
8
+ Rails ::HTML4 ::FullSanitizer
9
9
end
10
10
11
11
def link_sanitizer
12
- Rails ::HTML ::LinkSanitizer
12
+ Rails ::HTML4 ::LinkSanitizer
13
13
end
14
14
15
15
def safe_list_sanitizer
16
- Rails ::HTML ::SafeListSanitizer
16
+ Rails ::HTML4 ::SafeListSanitizer
17
17
end
18
18
19
19
def white_list_sanitizer # :nodoc:
@@ -36,8 +36,8 @@ def properly_encode(fragment, options)
36
36
end
37
37
end
38
38
39
- module Concern # :nodoc:
40
- module ComposedSanitize # :nodoc:
39
+ module Concern
40
+ module ComposedSanitize
41
41
def sanitize ( html , options = { } )
42
42
return unless html
43
43
return html if html . empty?
@@ -46,22 +46,22 @@ def sanitize(html, options = {})
46
46
end
47
47
end
48
48
49
- module Parser # :nodoc:
50
- module HTML4 # :nodoc:
49
+ module Parser
50
+ module HTML4
51
51
def parse_fragment ( html )
52
52
Loofah . html4_fragment ( html )
53
53
end
54
54
end
55
55
end
56
56
57
- module Scrubber # :nodoc:
58
- module Full # :nodoc:
57
+ module Scrubber
58
+ module Full
59
59
def scrub ( fragment , options = { } )
60
60
fragment . scrub! ( TextOnlyScrubber . new )
61
61
end
62
62
end
63
63
64
- module Link # :nodoc:
64
+ module Link
65
65
def initialize
66
66
super
67
67
@link_scrubber = TargetScrubber . new
@@ -74,7 +74,8 @@ def scrub(fragment, options = {})
74
74
end
75
75
end
76
76
77
- module SafeList # :nodoc:
77
+ module SafeList
78
+ # The default safe list for tags
78
79
DEFAULT_ALLOWED_TAGS = Set . new ( [
79
80
"a" ,
80
81
"abbr" ,
@@ -119,6 +120,8 @@ module SafeList # :nodoc:
119
120
"ul" ,
120
121
"var" ,
121
122
] ) . freeze
123
+
124
+ # The default safe list for attributes
122
125
DEFAULT_ALLOWED_ATTRIBUTES = Set . new ( [
123
126
"abbr" ,
124
127
"alt" ,
@@ -177,97 +180,116 @@ def allowed_attributes(options)
177
180
end
178
181
end
179
182
180
- module Serializer # :nodoc:
181
- module UTF8Encode # :nodoc:
183
+ module Serializer
184
+ module UTF8Encode
182
185
def serialize ( fragment )
183
186
properly_encode ( fragment , encoding : "UTF-8" )
184
187
end
185
188
end
186
189
187
- module SimpleString # :nodoc:
190
+ module SimpleString
188
191
def serialize ( fragment )
189
192
fragment . to_s
190
193
end
191
194
end
192
195
end
193
196
end
197
+ end
194
198
195
- # === Rails::HTML::FullSanitizer
196
- # Removes all tags but strips out scripts, forms and comments.
199
+ module HTML4
200
+ # == Rails::HTML4::FullSanitizer
201
+ #
202
+ # Removes all tags from HTML4 but strips out scripts, forms and comments.
203
+ #
204
+ # full_sanitizer = Rails::HTML4::FullSanitizer.new
205
+ # full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
206
+ # # => "Bold no more! See more here..."
197
207
#
198
- # full_sanitizer = Rails::HTML::FullSanitizer.new
199
- # full_sanitizer.sanitize("<b>Bold</b> no more! <a href='more.html'>See more here</a>...")
200
- # # => Bold no more! See more here...
201
- class FullSanitizer < Sanitizer
202
- include Concern ::ComposedSanitize
203
- include Concern ::Parser ::HTML4
204
- include Concern ::Scrubber ::Full
205
- include Concern ::Serializer ::UTF8Encode
208
+ class FullSanitizer < Rails ::HTML ::Sanitizer
209
+ include HTML ::Concern ::ComposedSanitize
210
+ include HTML ::Concern ::Parser ::HTML4
211
+ include HTML ::Concern ::Scrubber ::Full
212
+ include HTML ::Concern ::Serializer ::UTF8Encode
206
213
end
207
214
208
- # === Rails::HTML::LinkSanitizer
209
- # Removes +a+ tags and +href+ attributes leaving only the link text.
215
+ # == Rails::HTML4::LinkSanitizer
210
216
#
211
- # link_sanitizer = Rails::HTML::LinkSanitizer.new
212
- # link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
217
+ # Removes +a+ tags and +href+ attributes from HTML4 leaving only the link text.
213
218
#
214
- # => 'Only the link text will be kept.'
215
- class LinkSanitizer < Sanitizer
216
- include Concern ::ComposedSanitize
217
- include Concern ::Parser ::HTML4
218
- include Concern ::Scrubber ::Link
219
- include Concern ::Serializer ::SimpleString
219
+ # link_sanitizer = Rails::HTML4::LinkSanitizer.new
220
+ # link_sanitizer.sanitize('<a href="example.com">Only the link text will be kept.</a>')
221
+ # # => "Only the link text will be kept."
222
+ #
223
+ class LinkSanitizer < Rails ::HTML ::Sanitizer
224
+ include HTML ::Concern ::ComposedSanitize
225
+ include HTML ::Concern ::Parser ::HTML4
226
+ include HTML ::Concern ::Scrubber ::Link
227
+ include HTML ::Concern ::Serializer ::SimpleString
220
228
end
221
229
222
- # === Rails::HTML::SafeListSanitizer
223
- # Sanitizes html and css from an extensive safe list (see link further down).
230
+ # == Rails::HTML4::SafeListSanitizer
231
+ #
232
+ # Sanitizes HTML4 and CSS from an extensive safe list.
224
233
#
225
234
# === Whitespace
226
- # We can't make any guarantees about whitespace being kept or stripped.
227
- # Loofah uses Nokogiri, which wraps either a C or Java parser for the
228
- # respective Ruby implementation.
229
- # Those two parsers determine how whitespace is ultimately handled.
230
235
#
231
- # When the stripped markup will be rendered the users browser won't take
232
- # whitespace into account anyway. It might be better to suggest your users
233
- # wrap their whitespace sensitive content in pre tags or that you do
234
- # so automatically.
236
+ # We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri,
237
+ # which wraps either a C or Java parser for the respective Ruby implementation. Those two
238
+ # parsers determine how whitespace is ultimately handled.
239
+ #
240
+ # When the stripped markup will be rendered the users browser won't take whitespace into account
241
+ # anyway. It might be better to suggest your users wrap their whitespace sensitive content in
242
+ # pre tags or that you do so automatically.
235
243
#
236
244
# === Options
237
- # Sanitizes both html and css via the safe lists found here:
238
- # https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/safelist.rb
239
245
#
240
- # SafeListSanitizer also accepts options to configure
241
- # the safe list used when sanitizing html.
246
+ # Sanitizes both html and css via the safe lists found in
247
+ # Rails::HTML::Concern::Scrubber::SafeList
248
+ #
249
+ # SafeListSanitizer also accepts options to configure the safe list used when sanitizing html.
242
250
# There's a class level option:
243
- # Rails::HTML::SafeListSanitizer.allowed_tags = %w(table tr td)
244
- # Rails::HTML::SafeListSanitizer.allowed_attributes = %w(id class style)
245
251
#
246
- # Tags and attributes can also be passed to +sanitize+.
247
- # Passed options take precedence over the class level options.
252
+ # Rails::HTML4::SafeListSanitizer.allowed_tags = %w(table tr td)
253
+ # Rails::HTML4::SafeListSanitizer.allowed_attributes = %w(id class style)
254
+ #
255
+ # Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the
256
+ # class level options.
248
257
#
249
258
# === Examples
250
- # safe_list_sanitizer = Rails::HTML::SafeListSanitizer.new
251
259
#
252
- # Sanitize css doesn't take options
253
- # safe_list_sanitizer.sanitize_css('background-color: #000;')
260
+ # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new
261
+ #
262
+ # # default: sanitize via a extensive safe list of allowed elements
263
+ # safe_list_sanitizer.sanitize(@article.body)
264
+ #
265
+ # # sanitize via the supplied tags and attributes
266
+ # safe_list_sanitizer.sanitize(
267
+ # @article.body,
268
+ # tags: %w(table tr td),
269
+ # attributes: %w(id class style),
270
+ # )
254
271
#
255
- # Default: sanitize via a extensive safe list of allowed elements
256
- # safe_list_sanitizer.sanitize(@article.body)
272
+ # # sanitize via a custom Loofah scrubber
273
+ # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new )
257
274
#
258
- # Safe list via the supplied tags and attributes
259
- # safe_list_sanitizer.sanitize(@article.body, tags: %w(table tr td),
260
- # attributes: %w(id class style))
275
+ # # prune nodes from the tree instead of stripping tags and leaving inner content
276
+ # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new(prune: true)
261
277
#
262
- # Safe list via a custom scrubber
263
- # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
264
- class SafeListSanitizer < Sanitizer
265
- include Concern ::ComposedSanitize
266
- include Concern ::Parser ::HTML4
267
- include Concern ::Scrubber ::SafeList
268
- include Concern ::Serializer ::UTF8Encode
278
+ # # the sanitizer can also sanitize CSS
279
+ # safe_list_sanitizer.sanitize_css('background-color: #000;')
280
+ #
281
+ class SafeListSanitizer < Rails ::HTML ::Sanitizer
282
+ include HTML ::Concern ::ComposedSanitize
283
+ include HTML ::Concern ::Parser ::HTML4
284
+ include HTML ::Concern ::Scrubber ::SafeList
285
+ include HTML ::Concern ::Serializer ::UTF8Encode
269
286
end
287
+ end
270
288
289
+ module HTML
290
+ FullSanitizer = HTML4 ::FullSanitizer # :nodoc:
291
+ LinkSanitizer = HTML4 ::LinkSanitizer # :nodoc:
292
+ SafeListSanitizer = HTML4 ::SafeListSanitizer # :nodoc:
271
293
WhiteListSanitizer = SafeListSanitizer # :nodoc:
272
294
end
273
295
end
0 commit comments