Skip to content

CSS URLs longer than 1024 characters are not allowed #187

Open
@sapio-dwelch

Description

@sapio-dwelch

When trying to sanitize large data URIs used as background images in CSS properties, there is a hard-coded URL limit of 1024 characters (this is in StylingPolicy.sanitizeAndAppendUrl). Any value larger than 1024 characters is removed.

public class HtmlSanitizer {
    public static String TOO_LONG = "<table style=\"width:100%;\">\r\n" + 
            "    <tr>\r\n" + 
            "        <td style=\"background-image:url('data:image/svg+xml;base64,PHN2ZyB2aWV3" + 
            "Qm94PSIwIDAgMjAwLjAgMjAwvoidIGZpbGwtb3BhY2l0eT0iMSIgeG1sbnM6eGxpbms9Imh0dHA6Ly93" + 
            "d3cudzMub3JnLzE5OTkveGxpbmsiIGNvbG9yLXJlbmRlcmluZz0iYXV0byIgY29sb3ItaW50ZXJwb2xh" + 
            "dGlvbj0iYXV0byIgdGV4dC1yZW5kZXJpbmc9ImF1dG8iIHN0cm9rZT0iYmxhY2siIHN0cm9rZS1saW5l" + 
            "Y2FwPSJzcXVhcmUiIHdpZHRoPSIyMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc2hhcGUtcmVuZGVy" + 
            "aW5nPSJhdXRvIiBzdHJva2voidBhY2l0eT0iMSIgZmlsbD0iYmxhY2siIHN0cm9rZS1kYXNoYXJyYXk9" + 
            "Im5vbmUiIGZvbnQtd2VpZ2h0PSJub3JtYWwiIHN0cm9rZS13aWR0aD0iMSIgaGVpZ2h0PSIyMDAiIHht" + 
            "bG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgZm9udC1mYW1pbHk9IidEaWFsb2cnIiBmb250" + 
            "LXN0eWxlPSJub3JtYWwiIHN0cm9rZS1saW5lam9pbj0ibWl0ZXIiIGZvbnQtc2l6ZT0iMTJweCIgc3Ry" + 
            "b2tlLWRhc2hvZmZzZXQ9IjAiIGltYWdlLXJlbmRlcmluZz0iYXV0byINCj48IS0tR2VuZXJhdGVkIGJ5" + 
            "IE1hcnZpbiB3aXRoIEJhdGlrIFNWRyBHZW5lcmF0b3ItLT48ZGVmcyBpZD0iZ2VuZXJpY0RlZnMiDQog" + 
            "IC8+PGcNCiAgPjxkZWZzIGlkPSIxNDQ4MjQ5ODEwMjEtZGVmczEiDQogICAgPjxjbGlwUGF0aCBjbGlw" + 
            "UGF0aFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgaWQ9voidNDgyNDk4MTA0OTgtY2xpcFBhdGgxIg0KICAg" + 
            "ICAgPjxwYXRoIGQ9Ik0wIDAgTDIwMCAwIEwyMDAgMjAwIEwwIDIwMCBMMCAwIFoiDQogICAgICAvPjwv" + 
            "Y2xpcFBhdGgNCiAgICAgID48Y2xpcFBhdGggY2xpcFBhdGhVbml0cz0idXNlclNwYWNlT25Vc2UiIGlk" + 
            "PSIxNDQ4MjQ5OD=='); background-size: contain; background-repeat: no-repeat; back" + 
            "ground-position:center; height:100px; margin:auto;\"/>\r\n" + 
            "    </tr>\r\n" + 
            "</table>";
    
    public static String NOT_TOO_LONG = "<table style=\"width:100%;\">\r\n" + 
            "    <tr>\r\n" + 
            "        <td style=\"background-image:url('data:image/svg+xml;base64,PHN2ZyB2aWV3" + 
            "Qm94PSIwIDAgMjAwLjAgMjAwvoidIGZpbGwtb3BhY2l0eT0iMSIgeG1sbnM6eGxpbms9Imh0dHA6Ly93" + 
            "d3cudzMub3JnLzE5OTkveGxpbmsiIGNvbG9yLXJlbmRlcmluZz0iYXV0byIgY29sb3ItaW50ZXJwb2xh" + 
            "dGlvbj0iYXV0byIgdGV4dC1yZW5kZXJpbmc9ImF1dG8ivoidcm9rZT0iYmxhY2siIHN0cm9rZS1saW5l" + 
            "Y2FwPSJzcXVhcmUiIHdpZHRoPSIyMDAiIHN0cm9rZS1taXRlcmxpbWl0PSIxMCIgc2hhcGUtcmVuZGVy" + 
            "IE1hcnZpbiB3aXRoIEJhdGlrIFNWRyBHZW5lcmF0b3ItLT48ZGVmcyBpZD0iZ2VuZXJpY0RlZnMiDQog" + 
            "IC8+PGcNCiAgPjxkZWZzIGlkPSIxNDQ4MjQ5ODEwMjEtZGVmczEiDQogICAgPjxjbGlwUGF0aCBjbGlw" + 
            "UGF0aFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgaWQ9IjE0NDgyNDk4MTA0OTgtY2xpcFBhdGgxIg0KICAg" + 
            "ICAgPjxwYXRoIGQ9Ik0wIDAgTDIwMCAwIEwyMDAgMjAwIEwwIDIwMCBMMCAwIFoiDQogICAgICAvPjwv" + 
            "Y2xpcFBhdGgNCiAgvoidID48Y2xpcFBhdGggY2xpcFBhdGhVbml0cz0idXNlclNwYWNlT25Vc2UiIGlk" + 
            "PSIxNDQ4MjQ5OD=='); background-size: contain; background-repeat: no-repeat; back" + 
            "ground-position:center; height:100px; margin:auto;\"/>\r\n" + 
            "    </tr>\r\n" + 
            "</table>";
    
    public static final PolicyFactory HTML_POLICY = new HtmlPolicyBuilder()
            // allow all elements to be styled
            .allowStyling()
            // allow urls in bg images.
            .allowUrlsInStyles(AttributePolicy.IDENTITY_ATTRIBUTE_POLICY)
            // and also allow "data" URLs
            .allowUrlProtocols("data")
            // allow some more "global" elements
            .allowElements("table", "tbody", "th", "tr", "td").toFactory();
    
    public static void main(String args[]) {
        System.out.println(HTML_POLICY.sanitize(TOO_LONG));
        System.out.println(HTML_POLICY.sanitize(NOT_TOO_LONG));
    }
}

The output for TOO_LONG will not include the background-image property, but the output for NOT_TOO_LONG will. Is there a reason for the limit?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions