Committer: gprochaev
LJSV-1243. Allow users to embed media using iframesU trunk/cgi-bin/cleanhtml.pl
Modified: trunk/cgi-bin/cleanhtml.pl =================================================================== --- trunk/cgi-bin/cleanhtml.pl 2011-03-01 05:49:52 UTC (rev 18421) +++ trunk/cgi-bin/cleanhtml.pl 2011-03-01 07:57:35 UTC (rev 18422) @@ -32,7 +32,7 @@ { my $data = shift; LJ::CleanHTML::clean($data, { - 'eat' => [qw[layer iframe script object embed]], + 'eat' => [qw[layer script object embed]], 'mode' => 'allow', 'keepcomments' => 1, # Allows CSS to work }); @@ -116,7 +116,7 @@ my $data = shift; my $opts = shift; my $newdata; - + # remove the auth portion of any see_request.bml links $$data =~ s/(see_request\.bml\S+?)auth=\w+/$1/ig; @@ -143,13 +143,13 @@ my $remove_sizes = $opts->{'remove_sizes'} || 0; my $remove_fonts = $opts->{'remove_fonts'} || 0; my $blocked_links = (exists $opts->{'blocked_links'}) ? $opts->{'blocked_links'} : \@LJ::BLOCKED_LINKS; - my $blocked_link_substitute = + my $blocked_link_substitute = (exists $opts->{'blocked_link_substitute'}) ? $opts->{'blocked_link_substitute'} : ($LJ::BLOCKED_LINK_SUBSTITUTE) ? $LJ::BLOCKED_LINK_SUBSTITUTE : '#'; my $suspend_msg = $opts->{'suspend_msg'} || 0; my $unsuspend_supportid = $opts->{'unsuspend_supportid'} || 0; my $remove_all_attribs = $opts->{'remove_all_attribs'} || 0; - my %remove_attribs = ($opts->{'remove_attribs'}) ? + my %remove_attribs = ($opts->{'remove_attribs'}) ? (map {$_ => 1} @{ $opts->{'remove_attribs'} }) : (); my $remove_positioning = $opts->{'remove_positioning'} || 0; my $target = $opts->{'target'} || ''; @@ -163,7 +163,7 @@ # cuturl or entry_url tells about context and texts address, # Expand or close lj-cut tag should be switched directly by special flag # - expand_cut - $cut = '' if $opts->{expand_cut}; + $cut = '' if $opts->{expand_cut}; my @canonical_urls; # extracted links my %action = (); @@ -237,7 +237,11 @@ '<div style="width: 95%; overflow: auto">' . $edata . '</div></div>'; }; - my $htmlcleaner = HTMLCleaner->new(valid_stylesheet => \&LJ::valid_stylesheet_url); + ## We do not need to eat a tag 'iframe' if it enabled here. + my $htmlcleaner = HTMLCleaner->new( + valid_stylesheet => \&LJ::valid_stylesheet_url, + enable_iframe => (grep { $_ eq 'iframe' && $action{$_} == "allow" ? 1 : 0 } keys %action) ? 1 : 0 + ); my $eating_ljuser_span = 0; # bool, if we're eating an ljuser span my $ljuser_text_node = ""; # the last text node we saw while eating ljuser tags @@ -316,9 +320,9 @@ my $attr = $token->[2]; # hashref $good_until = length $newdata; - + if (LJ::is_enabled('remove_allowscriptaccess')) { - ## TODO: remove closing </param> tag, + ## TODO: remove closing </param> tag, ## don't strip 'allowscriptaccess' from YouTube and other trusted sites if ($tag eq 'param' && $attr->{name} eq 'allowscriptaccess') { next TOKEN; @@ -327,7 +331,7 @@ delete $attr->{allowscriptaccess}; } } - + if (@eatuntil) { push @capture, $token if $capturing_during_eat; if ($tag eq $eatuntil[-1]) { @@ -367,7 +371,7 @@ next TOKEN; } - + if ($tag eq 'lj-map') { $newdata .= LJ::Maps->expand_ljmap_tag($attr); next TOKEN; @@ -394,8 +398,8 @@ . qq(<input type="submit" value="$button" /> ) . qq[</form>]; } else { - $opencount{$tag} = { - button => $button, + $opencount{$tag} = { + button => $button, subject => $attr->{subject}, offset => length $newdata, }; @@ -490,16 +494,25 @@ } if ($tag eq 'iframe') { + ## Allow some iframes from trusted sources (if they are not eaten already) - ## TODO: add more trusted sites besides YouTube - ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html) + ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html), + ## Vimeo, VKontakte, Google Calendar, Google Docs my $src = $attr->{'src'}; - if ($src && $src =~ m!^https?://(?:[\w.-]*\.)?youtube\.com/embed/[-_a-zA-Z0-9]{11,}(?:\?.*)?$!) { - ## allow + if ($src && + ( + $src =~ m!^https?://(?:[\w.-]*\.)?youtube\.com/embed/[-_a-zA-Z0-9]{11,}(?:\?.*)?$! + || $src =~ m!^http://player\.vimeo\.com/video/(?:\d+)! + || $src =~ m!^http://vkontakte\.ru/video_ext\.php\?oid=(?:\d+)&id=(?:\d+)&hash=[a-zA-Z0-9]+$! + || $src =~ m!^http://www\.google\.com/calendar/embed\?src=! + || $src =~ m!^https://docs\.google.com/document/pub\?id=! + ) + ) { + ## allow } else { ## eat this tag if (!$attr->{'/'}) { - ## if not autoclosed tag (<iframe />), + ## if not autoclosed tag (<iframe />), ## then skip everything till the closing tag $p->get_tag("/iframe"); } @@ -524,18 +537,18 @@ # this is so the rte converts its source to the standard ljuser html my $ljuser_div = $tag eq "div" && $attr->{class} eq "ljuser"; if ($ljuser_div) { - + my $href = $p->get_tag("a"); my $href_attr = $href->[1]->{"href"}; my $username = LJ::get_user_by_url ( $href_attr ); $attr->{'user'} = $username ? $username : ''; - + my $ljuser_text = $p->get_text("/b"); $p->get_tag("/div"); $ljuser_text =~ s/\[info\]//; $tag = "lj"; $attr->{'title'} = $ljuser_text; - + } # stupid hack to remove the class='ljcut' from divs when we're # disabling them, so we account for the open div normally later. @@ -715,7 +728,7 @@ delete $hash->{$attr} unless $tag eq "object"; next; } - + ## warning: in commets left by anonymous users, <img src="something"> ## is replaced by <a href="something"> (see 'extractimages' param) ## If "something" is "data:<script ...", we'll get a vulnerability @@ -766,7 +779,7 @@ next ATTR; } } - + if ($opts->{'strongcleancss'}) { if ($hash->{style} =~ /-moz-|absolute|relative|outline|z-index|(?<!-)(?:top|left|right|bottom)\s*:|filter|-webkit-/io) { delete $hash->{style}; @@ -803,7 +816,7 @@ delete $hash->{$attr}; next; } - + # reserve ljs_* ids for divs, etc so users can't override them to replace content if ($attr eq 'id' && $hash->{$attr} =~ /^ljs_/i) { delete $hash->{$attr}; @@ -871,7 +884,7 @@ } } } - + unless ($hash->{href} =~ s/^lj:(?:\/\/)?(.*)$/ExpandLJURL($1)/ei) { $hash->{href} = canonical_url($hash->{href}, 1); } @@ -892,27 +905,27 @@ $hash->{'height'} > $opts->{'maximgheight'})) { $img_bad = 1; } } if ($opts->{'extractimages'}) { $img_bad = 1; } - + ## TODO: a better check of $hash->{src} is needed, - ## known (fixed) vulnerability is src="data:..." - $hash->{src} = canonical_url($hash->{src}, 1); - - ## Ratings can be cheated by commenting a popular post with - ## <img src="http://my-journal.livejournal.com/12345.html"> + ## known (fixed) vulnerability is src="data:..." + $hash->{src} = canonical_url($hash->{src}, 1); + + ## Ratings can be cheated by commenting a popular post with + ## <img src="http://my-journal.livejournal.com/12345.html"> if ($hash->{src} =~ m!/\d+\.html$!) { - next TOKEN; + next TOKEN; } - + ## CDN: ## http://pics.livejournal.com/<certain-journal>/pic/000fbt9x* -> l-pics.livejournal.com ## TODO: make it work for communities too if ($hash->{'src'} =~ m!^http://(?:l-)?pics.livejournal.com/(\w+)/pic/(.*)$!i) { my ($journal, $rest) = ($1, $2); - my $host = (!$LJ::DISABLED{'pics_via_cdn'} && $LJ::USE_CDN_FOR_PICS{$journal}) + my $host = (!$LJ::DISABLED{'pics_via_cdn'} && $LJ::USE_CDN_FOR_PICS{$journal}) ? "l-pics.livejournal.com" : "pics.livejournal.com"; $hash->{'src'} = "http://$host/$journal/pic/$rest"; } - + if ($img_bad) { $newdata .= "<a class=\"ljimgplaceholder\" href=\"" . LJ::ehtml($hash->{'src'}) . "\">" . @@ -1035,7 +1048,6 @@ { my $tag = $token->[1]; next TOKEN if $tag =~ /[^\w\-:]/; - if (@eatuntil) { push @capture, $token if $capturing_during_eat; @@ -1078,14 +1090,14 @@ my $captured = substr $newdata => $opencount{$tag}->{offset}; if ($captured and my $entry = LJ::Entry->new_from_url($opts->{cuturl})){ - # !!! avoid calling any 'text' methods on $entry, + # !!! avoid calling any 'text' methods on $entry, # it can produce inifinite loop of cleanhtml calls. unless ($subject){ $subject = LJ::ehtml($entry->subject_raw || LJ::Lang::ml("repost.default_subject")); $subject = Encode::decode_utf8($subject) if $subject; } - $captured = LJ::Lang::ml("repost.wrapper", { + $captured = LJ::Lang::ml("repost.wrapper", { username => $entry->poster->username, url => $entry->url, subject => $subject, @@ -1109,7 +1121,7 @@ . qq(<input type="submit" value="$button" /> ) . qq[</form>]; } - + delete $opencount{$tag}; } elsif ( $tag eq 'lj-lang' ) { @@ -1544,7 +1556,7 @@ table tr td th tbody tfoot thead colgroup caption area map form textarea blink ); -my @comment_all = (@comment_close, "img", "br", "hr", "p", "col"); +my @comment_all = (@comment_close, "img", "br", "hr", "p", "col", "iframe"); my $userbio_eat = $event_eat; my $userbio_remove = $event_remove; @@ -1590,7 +1602,7 @@ sub pre_clean_event_for_entryform { my $ref = shift; - + ## fast path - no html tags return unless $$ref =~ /</; @@ -1607,7 +1619,7 @@ my $tag = $token->[1]; my $hash = $token->[2]; # attributes my $attrs = $token->[3]; # attribute names, in original order - + ## check the tag if ($tag eq 'script') { $p->get_tag('/script'); @@ -1644,7 +1656,7 @@ delete $hash->{$attr}; next; } - ## TODO: css & xslt js expressions + ## TODO: css & xslt js expressions } ## reconstruct the tag $newdata .= "<$tag"; @@ -1658,11 +1670,11 @@ } else { $newdata .= $token->[1]; } - } + } # extra-paranoid check 1 while $newdata =~ s/<script\b//ig; - + $$ref = Encode::encode_utf8($newdata); } @@ -1697,7 +1709,7 @@ 'linkify' => 1, 'wordlength' => 40, 'addbreaks' => $opts->{preformatted} ? 0 : 1, - 'eat' => [qw[head title style layer iframe applet object]], + 'eat' => [qw[head title style layer applet object]], 'mode' => 'deny', 'allow' => \@comment_all, 'autoclose' => \@comment_close, @@ -1723,7 +1735,7 @@ 'linkify' => 1, 'wordlength' => 40, 'addbreaks' => 0, - 'eat' => [qw[head title style layer iframe applet object]], + 'eat' => [qw[head title style layer applet object]], 'mode' => 'deny', 'allow' => \@comment_all, 'autoclose' => \@comment_close, @@ -1764,7 +1776,7 @@ LJ::parse_vars(\$s1, \%tmpl); foreach my $v (keys %tmpl) { clean(\$tmpl{$v}, { - 'eat' => [qw[layer iframe script object embed applet]], + 'eat' => [qw[layer script object embed applet]], 'mode' => 'allow', 'keepcomments' => 1, # allows CSS to work 'clean_js_css' => 1,