Open
Description
When trying to sanitize large data URIs used as background images in CSS properties, there is a hard-coded URL limit of 1024 characters (this is in StylingPolicy.sanitizeAndAppendUrl
). Any value larger than 1024 characters is removed.
public class HtmlSanitizer {
public static String TOO_LONG = "<table style=\"width:100%;\">\r\n" +
" <tr>\r\n" +
" <td style=\"background-image:url('data:image/svg+xml;base64,PHN2ZyB2aWV3" +
"Qm94PSIwIDAgMjAwLjAgMjAwvoidIGZpbGwtb3BhY2l0eT0iMSIgeG1sbnM6eGxpbms9Imh0dHA6Ly93" +
"d3cudzMub3JnLzE5OTkveGxpbmsiIGNvbG9yLXJlbmRlcmluZz0iYXV0byIgY29sb3ItaW50ZXJwb2xh" +
"dGlvbj0iYXV0byIgdGV4dC1yZW5kZXJpbmc9ImF1dG8iIHN0cm9rZT0iYmxhY2siIHN0cm9rZS1saW5l" +
"Y2FwPSJzcXVhcmUiIHdpZHRoPSIyMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc2hhcGUtcmVuZGVy" +
"aW5nPSJhdXRvIiBzdHJva2voidBhY2l0eT0iMSIgZmlsbD0iYmxhY2siIHN0cm9rZS1kYXNoYXJyYXk9" +
"Im5vbmUiIGZvbnQtd2VpZ2h0PSJub3JtYWwiIHN0cm9rZS13aWR0aD0iMSIgaGVpZ2h0PSIyMDAiIHht" +
"bG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgZm9udC1mYW1pbHk9IidEaWFsb2cnIiBmb250" +
"LXN0eWxlPSJub3JtYWwiIHN0cm9rZS1saW5lam9pbj0ibWl0ZXIiIGZvbnQtc2l6ZT0iMTJweCIgc3Ry" +
"b2tlLWRhc2hvZmZzZXQ9IjAiIGltYWdlLXJlbmRlcmluZz0iYXV0byINCj48IS0tR2VuZXJhdGVkIGJ5" +
"IE1hcnZpbiB3aXRoIEJhdGlrIFNWRyBHZW5lcmF0b3ItLT48ZGVmcyBpZD0iZ2VuZXJpY0RlZnMiDQog" +
"IC8+PGcNCiAgPjxkZWZzIGlkPSIxNDQ4MjQ5ODEwMjEtZGVmczEiDQogICAgPjxjbGlwUGF0aCBjbGlw" +
"UGF0aFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgaWQ9voidNDgyNDk4MTA0OTgtY2xpcFBhdGgxIg0KICAg" +
"ICAgPjxwYXRoIGQ9Ik0wIDAgTDIwMCAwIEwyMDAgMjAwIEwwIDIwMCBMMCAwIFoiDQogICAgICAvPjwv" +
"Y2xpcFBhdGgNCiAgICAgID48Y2xpcFBhdGggY2xpcFBhdGhVbml0cz0idXNlclNwYWNlT25Vc2UiIGlk" +
"PSIxNDQ4MjQ5OD=='); background-size: contain; background-repeat: no-repeat; back" +
"ground-position:center; height:100px; margin:auto;\"/>\r\n" +
" </tr>\r\n" +
"</table>";
public static String NOT_TOO_LONG = "<table style=\"width:100%;\">\r\n" +
" <tr>\r\n" +
" <td style=\"background-image:url('data:image/svg+xml;base64,PHN2ZyB2aWV3" +
"Qm94PSIwIDAgMjAwLjAgMjAwvoidIGZpbGwtb3BhY2l0eT0iMSIgeG1sbnM6eGxpbms9Imh0dHA6Ly93" +
"d3cudzMub3JnLzE5OTkveGxpbmsiIGNvbG9yLXJlbmRlcmluZz0iYXV0byIgY29sb3ItaW50ZXJwb2xh" +
"dGlvbj0iYXV0byIgdGV4dC1yZW5kZXJpbmc9ImF1dG8ivoidcm9rZT0iYmxhY2siIHN0cm9rZS1saW5l" +
"Y2FwPSJzcXVhcmUiIHdpZHRoPSIyMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc2hhcGUtcmVuZGVy" +
"IE1hcnZpbiB3aXRoIEJhdGlrIFNWRyBHZW5lcmF0b3ItLT48ZGVmcyBpZD0iZ2VuZXJpY0RlZnMiDQog" +
"IC8+PGcNCiAgPjxkZWZzIGlkPSIxNDQ4MjQ5ODEwMjEtZGVmczEiDQogICAgPjxjbGlwUGF0aCBjbGlw" +
"UGF0aFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgaWQ9IjE0NDgyNDk4MTA0OTgtY2xpcFBhdGgxIg0KICAg" +
"ICAgPjxwYXRoIGQ9Ik0wIDAgTDIwMCAwIEwyMDAgMjAwIEwwIDIwMCBMMCAwIFoiDQogICAgICAvPjwv" +
"Y2xpcFBhdGgNCiAgvoidID48Y2xpcFBhdGggY2xpcFBhdGhVbml0cz0idXNlclNwYWNlT25Vc2UiIGlk" +
"PSIxNDQ4MjQ5OD=='); background-size: contain; background-repeat: no-repeat; back" +
"ground-position:center; height:100px; margin:auto;\"/>\r\n" +
" </tr>\r\n" +
"</table>";
public static final PolicyFactory HTML_POLICY = new HtmlPolicyBuilder()
// allow all elements to be styled
.allowStyling()
// allow urls in bg images.
.allowUrlsInStyles(AttributePolicy.IDENTITY_ATTRIBUTE_POLICY)
// and also allow "data" URLs
.allowUrlProtocols("data")
// allow some more "global" elements
.allowElements("table", "tbody", "th", "tr", "td").toFactory();
public static void main(String args[]) {
System.out.println(HTML_POLICY.sanitize(TOO_LONG));
System.out.println(HTML_POLICY.sanitize(NOT_TOO_LONG));
}
}
The output for TOO_LONG will not include the background-image property, but the output for NOT_TOO_LONG will. Is there a reason for the limit?
Metadata
Metadata
Assignees
Labels
No labels