Committer: amyshkin
LJSUP-11455: bad view variable mediaplaceholder.loading in russianU trunk/cgi-bin/cleanhtml.pl
Modified: trunk/cgi-bin/cleanhtml.pl =================================================================== --- trunk/cgi-bin/cleanhtml.pl 2012-03-06 14:15:41 UTC (rev 21308) +++ trunk/cgi-bin/cleanhtml.pl 2012-03-06 14:22:50 UTC (rev 21309) @@ -604,6 +604,7 @@ or return 1; return $code->($htmlcleaner, $seq, $attr); }; + next if !$@ && !$clean_res; # this is so the rte converts its source to the standard ljuser html @@ -622,6 +623,7 @@ $attr->{'title'} = $ljuser_text; } + # stupid hack to remove the class='ljcut' from divs when we're # disabling them, so we account for the open div normally later. my $ljcut_div = $tag eq "div" && lc $attr->{class} eq "ljcut"; @@ -927,167 +929,171 @@ delete $hash->{'action'} if $deny; } - ATTR: - foreach my $attr (keys %$hash) - { - if ($remove_all_attribs || $remove_attribs{$attr}) { - delete $hash->{$attr}; - next; - } + if ( $remove_all_attribs ) { + $hash = {}; + } + else { + ATTR: + foreach my $attr (keys %$hash) { + if ( $remove_attribs{$attr} ) { + delete $hash->{$attr}; + next; + } - if ($attr =~ /^(?:on|dynsrc)/) { - delete $hash->{$attr}; - next; - } + if ($attr =~ /^(?:on|dynsrc)/) { + delete $hash->{$attr}; + next; + } - if ($attr eq "data") { - delete $hash->{$attr} unless $tag eq "object"; - next; - } + if ($attr eq "data") { + delete $hash->{$attr} unless $tag eq "object"; + next; + } - if ($attr eq 'width' || $attr eq 'height' ) { - if ($hash->{$attr} > 1024*2) { - $hash->{$attr} = 1024*2; + if ($attr eq 'width' || $attr eq 'height' ) { + if ($hash->{$attr} > 1024*2) { + $hash->{$attr} = 1024*2; + } } - } - ## warning: in commets left by anonymous users, <img src="something"> - ## is replaced by <a href="something"> (see 'extractimages' param) - ## If "something" is "data:<script ...", we'll get a vulnerability - if (($attr eq "href" || $attr eq 'src') && $hash->{$attr} =~ /^data/) { - delete $hash->{$attr}; - next; - } + ## warning: in commets left by anonymous users, <img src="something"> + ## is replaced by <a href="something"> (see 'extractimages' param) + ## If "something" is "data:<script ...", we'll get a vulnerability + if (($attr eq "href" || $attr eq 'src') && $hash->{$attr} =~ /^data/) { + delete $hash->{$attr}; + next; + } - if ($attr =~ /(?:^=)|[\x0b\x0d]/) { - # Cleaner attack: <p ='>' onmouseover="javascript:alert(document/**/.cookie)" > - # is returned by HTML::Parser as P_tag("='" => "='") Text( onmouseover...) - # which leads to reconstruction of valid HTML. Clever! - # detect this, and fail. - $total_fail->("$tag $attr"); - last TOKEN; - } + if ($attr =~ /(?:^=)|[\x0b\x0d]/) { + # Cleaner attack: <p ='>' onmouseover="javascript:alert(document/**/.cookie)" > + # is returned by HTML::Parser as P_tag("='" => "='") Text( onmouseover...) + # which leads to reconstruction of valid HTML. Clever! + # detect this, and fail. + $total_fail->("$tag $attr"); + last TOKEN; + } - # ignore attributes that do not fit this strict scheme - unless ($attr =~ /^[\w_:-]+$/) { - $total_fail->("$tag " . (%$hash > 1 ? "[...] " : "") . "$attr"); - last TOKEN; - } + # ignore attributes that do not fit this strict scheme + unless ($attr =~ /^[\w_:-]+$/) { + $total_fail->("$tag " . (%$hash > 1 ? "[...] " : "") . "$attr"); + last TOKEN; + } - $hash->{$attr} =~ s/[\t\n]//g; + $hash->{$attr} =~ s/[\t\n]//g; - # IE ignores the null character, so strip it out - $hash->{$attr} =~ s/\x0//g; + # IE ignores the null character, so strip it out + $hash->{$attr} =~ s/\x0//g; - # IE sucks: - my $nowhite = $hash->{$attr}; - $nowhite =~ s/[\s\x0b]+//g; - if ($nowhite =~ /(?:jscript|livescript|javascript|vbscript|about):/ix) { - delete $hash->{$attr}; - next; - } + # IE sucks: + my $nowhite = $hash->{$attr}; + $nowhite =~ s/[\s\x0b]+//g; + if ($nowhite =~ /(?:jscript|livescript|javascript|vbscript|about):/ix) { + delete $hash->{$attr}; + next; + } - if ($attr eq 'style') { - if ($opts->{'cleancss'}) { - # css2 spec, section 4.1.3 - # position === p\osition :( - # strip all slashes no matter what. - $hash->{style} =~ s/\\//g; + if ($attr eq 'style') { + if ($opts->{'cleancss'}) { + # css2 spec, section 4.1.3 + # position === p\osition :( + # strip all slashes no matter what. + $hash->{style} =~ s/\\//g; - # and catch the obvious ones ("[" is for things like document["coo"+"kie"] - foreach my $css ("/*", "[", qw(absolute fixed expression eval behavior cookie document window javascript -moz-binding)) { - if ($hash->{style} =~ /\Q$css\E/i) { - delete $hash->{style}; - next ATTR; + # and catch the obvious ones ("[" is for things like document["coo"+"kie"] + foreach my $css ("/*", "[", qw(absolute fixed expression eval behavior cookie document window javascript -moz-binding)) { + if ($hash->{style} =~ /\Q$css\E/i) { + delete $hash->{style}; + next ATTR; + } } - } - if ($opts->{'strongcleancss'}) { - if ($hash->{style} =~ /-moz-|absolute|relative|outline|z-index|(?<!-)(?:top|left|right|bottom)\s*:|filter|-webkit-/io) { - delete $hash->{style}; - next ATTR; + if ($opts->{'strongcleancss'}) { + if ($hash->{style} =~ /-moz-|absolute|relative|outline|z-index|(?<!-)(?:top|left|right|bottom)\s*:|filter|-webkit-/io) { + delete $hash->{style}; + next ATTR; + } } + + # remove specific CSS definitions + if ($remove_colors) { + $hash->{style} =~ s/(?:background-)?color:.*?(?:;|$)//gi; + } + if ($remove_sizes) { + $hash->{style} =~ s/font-size:.*?(?:;|$)//gi; + } + if ($remove_fonts) { + $hash->{style} =~ s/font-family:.*?(?:;|$)//gi; + } + if ($remove_positioning) { + $hash->{style} =~ s/margin.*?(?:;|$)//gi; + $hash->{style} =~ s/height\s*?:.*?(?:;|$)//gi; + # strip excessive padding + $hash->{style} =~ s/padding[^:]*?:\D*\d{3,}[^;]*(?:;|$)//gi; + } } - # remove specific CSS definitions - if ($remove_colors) { - $hash->{style} =~ s/(?:background-)?color:.*?(?:;|$)//gi; + if ($opts->{'clean_js_css'} && ! $LJ::DISABLED{'css_cleaner'}) { + # and then run it through a harder CSS cleaner that does a full parse + my $css = LJ::CSS::Cleaner->new; + $hash->{style} = $css->clean_property($hash->{style}); } - if ($remove_sizes) { - $hash->{style} =~ s/font-size:.*?(?:;|$)//gi; - } - if ($remove_fonts) { - $hash->{style} =~ s/font-family:.*?(?:;|$)//gi; - } - if ($remove_positioning) { - $hash->{style} =~ s/margin.*?(?:;|$)//gi; - $hash->{style} =~ s/height\s*?:.*?(?:;|$)//gi; - # strip excessive padding - $hash->{style} =~ s/padding[^:]*?:\D*\d{3,}[^;]*(?:;|$)//gi; - } } - if ($opts->{'clean_js_css'} && ! $LJ::DISABLED{'css_cleaner'}) { - # and then run it through a harder CSS cleaner that does a full parse - my $css = LJ::CSS::Cleaner->new; - $hash->{style} = $css->clean_property($hash->{style}); + if ( + lc $tag ne 'lj-embed' && + ( $attr eq 'class' || $attr eq 'id' ) && + $opts->{'strongcleancss'} ) + { + delete $hash->{$attr}; + next; } - } - if ( - lc $tag ne 'lj-embed' && - ( $attr eq 'class' || $attr eq 'id' ) && - $opts->{'strongcleancss'} ) - { - delete $hash->{$attr}; - next; - } - - # reserve ljs_* ids for divs, etc so users can't override them to replace content - if ($attr eq 'id' && $hash->{$attr} =~ /^ljs_/i) { - delete $hash->{$attr}; - next; - } - - if ($s1var) { - if ($attr =~ /%%/) { + # reserve ljs_* ids for divs, etc so users can't override them to replace content + if ($attr eq 'id' && $hash->{$attr} =~ /^ljs_/i) { delete $hash->{$attr}; - next ATTR; + next; } - my $props = $LJ::S1::PROPS->{$s1var}; + if ($s1var) { + if ($attr =~ /%%/) { + delete $hash->{$attr}; + next ATTR; + } - if ($hash->{$attr} =~ /^%%([\w:]+:)?(\S+?)%%$/ && $props->{$2} =~ /[aud]/) { - # don't change it. - } elsif ($hash->{$attr} =~ /^%%cons:\w+%%[^\%]*$/) { - # a site constant with something appended is also fine. - } elsif ($hash->{$attr} =~ /%%/) { - my $clean_var = sub { - my ($mods, $prop) = @_; - # HTML escape and kill line breaks - $mods = "attr:$mods" unless - $mods =~ /^(color|cons|siteroot|sitename|img):/ || - $props->{$prop} =~ /[ud]/; - return '%%' . $mods . $prop . '%%'; - }; + my $props = $LJ::S1::PROPS->{$s1var}; - $hash->{$attr} =~ s/[\n\r]//g; - $hash->{$attr} =~ s/%%([\w:]+:)?(\S+?)%%/$clean_var->(lc($1), $2)/eg; + if ($hash->{$attr} =~ /^%%([\w:]+:)?(\S+?)%%$/ && $props->{$2} =~ /[aud]/) { + # don't change it. + } elsif ($hash->{$attr} =~ /^%%cons:\w+%%[^\%]*$/) { + # a site constant with something appended is also fine. + } elsif ($hash->{$attr} =~ /%%/) { + my $clean_var = sub { + my ($mods, $prop) = @_; + # HTML escape and kill line breaks + $mods = "attr:$mods" unless + $mods =~ /^(color|cons|siteroot|sitename|img):/ || + $props->{$prop} =~ /[ud]/; + return '%%' . $mods . $prop . '%%'; + }; - if ($attr =~ /^(href|src|lowsrc|style)$/) { - $hash->{$attr} = "\%\%[attr[$hash->{$attr}]]\%\%"; + $hash->{$attr} =~ s/[\n\r]//g; + $hash->{$attr} =~ s/%%([\w:]+:)?(\S+?)%%/$clean_var->(lc($1), $2)/eg; + + if ($attr =~ /^(href|src|lowsrc|style)$/) { + $hash->{$attr} = "\%\%[attr[$hash->{$attr}]]\%\%"; + } } + } + # remove specific attributes + if (($remove_colors && ($attr eq "color" || $attr eq "bgcolor" || $attr eq "fgcolor" || $attr eq "text")) || + ($remove_sizes && $attr eq "size") || + ($remove_fonts && $attr eq "face")) { + delete $hash->{$attr}; + next ATTR; + } } - - # remove specific attributes - if (($remove_colors && ($attr eq "color" || $attr eq "bgcolor" || $attr eq "fgcolor" || $attr eq "text")) || - ($remove_sizes && $attr eq "size") || - ($remove_fonts && $attr eq "face")) { - delete $hash->{$attr}; - next ATTR; - } } ## attribute lj-sys-message-close is used in SiteMessage's only @@ -1177,8 +1183,8 @@ '<span class="b-mediaplaceholder-outer">' . '<span class="b-mediaplaceholder-inner">' . '<i class="b-mediaplaceholder-pic"></i>' . - '<span class="b-mediaplaceholder-label b-mediaplaceholder-view">' . LJ::Lang::ml("mediaplaceholder.viewimage") . '</span>'. - '<span class="b-mediaplaceholder-label b-mediaplaceholder-loading">' . LJ::Lang::ml("mediaplaceholder.loading") . '</span>'. + '<span class="b-mediaplaceholder-label b-mediaplaceholder-view">' . Encode::decode_utf8(LJ::Lang::ml("mediaplaceholder.viewimage")) . '</span>'. + '<span class="b-mediaplaceholder-label b-mediaplaceholder-loading">' . Encode::decode_utf8(LJ::Lang::ml("mediaplaceholder.loading")) . '</span>'. '</span>' . '</span>' . '</a>'; @@ -1186,7 +1192,7 @@ '<a href="' . $href_b_link .'" class="b-mediaplaceholder-external" title="' . LJ::Lang::ml("mediaplaceholder.link") . '">' . '<i class="b-mediaplaceholder-bg"></i>' . '<i class="b-mediaplaceholder-pic"></i>' . - '<span class="b-mediaplaceholder-inner">' . LJ::Lang::ml("mediaplaceholder.link") . '</span>' . + '<span class="b-mediaplaceholder-inner">' . Encode::decode_utf8(LJ::Lang::ml("mediaplaceholder.link")) . '</span>' . '</a>' : ''; $alt_output = 1; $opencount{"img"}++; @@ -1254,9 +1260,9 @@ } } - unless ($alt_output) - { + unless ($alt_output) { my $allow; + if ($mode eq "allow") { $allow = 1; if ($action{$tag} eq "deny") { $allow = 0; } @@ -1265,8 +1271,9 @@ if ($action{$tag} eq "allow") { $allow = 1; } } - if ($allow && ! $remove{$tag}) - { + my $newtag = ''; + + if ($allow && ! $remove{$tag}) { if ($opts->{'tablecheck'}) { $allow = 0 if @@ -1281,13 +1288,13 @@ ($tag eq 'table' && @tablescope && ! grep { $tablescope[-1]->{$_} } qw(td th)); } - if ($allow) { $newdata .= "<$tag"; } - else { $newdata .= "<$tag"; } + if ($allow) { $newtag .= "<$tag"; } + else { $newtag .= "<$tag"; } # output attributes in original order, but only those # that are allowed (by still being in %$hash after cleaning) foreach (@$attrs) { - $newdata .= " $_=\"" . LJ::ehtml($hash->{$_}) . "\"" + $newtag .= " $_=\"" . LJ::ehtml($hash->{$_}) . "\"" if exists $hash->{$_}; } @@ -1295,12 +1302,12 @@ # actually close itself. Otherwise, a tag like <em /> can pass through as valid # even though some browsers just render it as an opening tag if ($slashclose && $tag =~ $slashclose_tags) { - $newdata .= " /"; + $newtag .= " /"; $opencount{$tag}--; $tablescope[-1]->{$tag}-- if $opts->{'tablecheck'} && @tablescope; } if ($allow) { - $newdata .= ">"; + $newtag .= ">"; $opencount{$tag}++; # maintain current table scope @@ -1317,7 +1324,31 @@ } } - else { $newdata .= ">"; } + else { $newtag .= ">"; } + + # change iframe with video to placeholder according to user settings + if ( lc $tag eq 'iframe' && $opts->{video_placeholders} ) { + my $width = $hash->{width}; + my $height = $hash->{height}; + $width =~ s/px$//; + $height =~ s/px$//; + $width = 960 if $width > 960; + $height = 750 if $height > 750; + + $width = $width =~ /^\d+$/ ? $width : 320; + $height = $height =~ /^\d+$/ ? $height : 240; + + $newdata .= LJ::placeholder_link( + placeholder_html => $newtag, + width => $width, + height => $height, + img => "$LJ::IMGPREFIX/videoplaceholder.png", + remove_video_sizes => $opts->{remove_video_sizes}, + ); + } + else { + $newdata .= $newtag; + } } } }