wisest owl (wisest_owl) wrote in changelog,
wisest owl
wisest_owl
changelog

[livejournal] r17862: LJSUP-7507. need to cut the text by the ...

Committer: gprochaev
LJSUP-7507. need to cut the text by the rules

U   trunk/cgi-bin/LJ/Browse/Parser.pm
U   trunk/cgi-bin/LJ/Widget/Browse.pm
Modified: trunk/cgi-bin/LJ/Browse/Parser.pm
===================================================================
--- trunk/cgi-bin/LJ/Browse/Parser.pm	2010-12-06 07:45:08 UTC (rev 17861)
+++ trunk/cgi-bin/LJ/Browse/Parser.pm	2010-12-06 08:10:09 UTC (rev 17862)
@@ -23,9 +23,11 @@
     my $ret = '';
     my @open_tags = ();
     my $content_len = 0;
+    my $is_removed_video = 0;
     my $images_crop_cnt = $args{'crop_image'};
     my @images = ();
     my $remove_tags = $args{'remove_tags'};
+    my $is_text_trimmed = 0;
 
     while (my $token = $p->get_token) {
         my $type = $token->[0];
@@ -33,8 +35,9 @@
         my $attr = $token->[2];  # hashref
 
         if ($type eq "S") {
-            my $selfclose;
+            my $selfclose = 0;
 
+            ## resize and crop first image from post if exist
             if ($tag eq 'img') {
                 next unless $images_crop_cnt;
                 $images_crop_cnt--;
@@ -49,29 +52,50 @@
                 next;
             }
 
-            next if grep { $tag eq $_ } @$remove_tags;
+            if (grep { $tag eq $_ } @$remove_tags) {
+                ## adding space to the text do not stick together
+                $ret .= " ";
+                next;
+            }
 
-            # start tag
-            $ret .= "<$tag";
+            if ($tag =~ /^lj-poll/) {
+                ## no need to insert poll
+                $ret .= " ";
+            } elsif ($tag =~ /^lj-embed/) {
+                ## nothing to do. remove all embed content
+                $is_removed_video = 1;
+                $ret .= " ";
+            } elsif ($tag =~ /^lj-cut/) {
+                ## remove all text from lj-cut
+                $ret .= " ";
+            } elsif ($tag eq 'lj') {
+                foreach my $attrname (keys %$attr) {
+                    if ($attrname =~ /user|comm/) {
+                        $ret .= LJ::ljuser($attr->{$attrname});
+                    }
+                }
+                $selfclose = 1;
+            } else {
+                $ret .= "<$tag";
 
-            # assume tags are properly self-closed
-            $selfclose = 1 if lc $tag eq 'input' || lc $tag eq 'br' || lc $tag eq 'img';
+                # assume tags are properly self-closed
+                $selfclose = 1 if lc $tag eq 'input' || lc $tag eq 'br' || lc $tag eq 'img';
 
-            # preserve order of attributes. the original order is
-            # in element 4 of $token
-            foreach my $attrname (@{$token->[3]}) {
-                if ($attrname eq '/') {
-                    $selfclose = 1;
-                    next;
+                # preserve order of attributes. the original order is
+                # in element 4 of $token
+                foreach my $attrname (@{$token->[3]}) {
+                    if ($attrname eq '/') {
+                        next;
+                    }
+
+                    # FIXME: ultra ghetto.
+                    $attr->{$attrname} = LJ::no_utf8_flag($attr->{$attrname});
+                    $ret .= " $attrname=\"" . LJ::ehtml($attr->{$attrname}) . "\"";
                 }
 
-                # FIXME: ultra ghetto.
-                $attr->{$attrname} = LJ::no_utf8_flag($attr->{$attrname});
-                $ret .= " $attrname=\"" . LJ::ehtml($attr->{$attrname}) . "\"";
+                $ret .= $selfclose ? " />" : ">";
             }
 
-            $ret .= $selfclose ? " />" : ">";
-
             push @open_tags, $tag unless $selfclose;
 
         } elsif ($type eq 'T' || $type eq 'D') {
@@ -80,8 +104,9 @@
             if (length($content) + $content_len > $char_max) {
 
                 # truncate and stop parsing
-                $content = LJ::text_trim($content, undef, ($char_max - $content_len));
+                $content = LJ::trim_at_word($content, ($char_max - $content_len));
                 $ret .= $content;
+                $is_text_trimmed = 1;
                 last;
             }
 
@@ -107,8 +132,10 @@
     _after_parse (\$ret);
 
     return {
-        text    => $ret,
-        images  => \@images,
+        text             => $ret,
+        images           => \@images,
+        is_removed_video => $is_removed_video,
+        is_text_trimmed  => $is_text_trimmed,
     }
 }
 
@@ -117,6 +144,9 @@
 
     ## Remove multiple "br" tags
     $$text =~ s#(\s*<br\s*/?>\s*){2,}# #gi;
+
+    ## Remove all content of 'script' tag
+    $$text =~ s#<script.*?/script># #gis;
 }
 
 1;

Modified: trunk/cgi-bin/LJ/Widget/Browse.pm
===================================================================
--- trunk/cgi-bin/LJ/Widget/Browse.pm	2010-12-06 07:45:08 UTC (rev 17861)
+++ trunk/cgi-bin/LJ/Widget/Browse.pm	2010-12-06 08:10:09 UTC (rev 17862)
@@ -251,12 +251,12 @@
             my @tags = $entry->tags;
             my $subject = $entry->subject_text || '***';
             my $trimmed_subj = LJ::html_trim ($subject, 60);
-            my $event = $entry->event_html;
+            my $event = $entry->event_raw;
 
             my $parsed = LJ::Browse::Parser->do_parse (
                 text        => $event,
-                remove_tags => [ 'b', 'p', 'div', 'span', 'strong' ],
-                max_len     => 1000,
+                remove_tags => [ 'b', 'p', 'div', 'span', 'strong', 'font' ],
+                max_len     => 800,
                 crop_image  => 1,
             );
             $event = $parsed->{'text'};
@@ -277,7 +277,7 @@
                 url_to_post     => $entry->url,
                 photo_for_post  => scalar @$images ? $images->[0] : '',
                 comments_count  => $entry->reply_count,
-                is_need_more    => $parsed->{'is_removed_video'} || bytes::length($entry->event_html) > 800 ? 1 : 0,
+                is_need_more    => $parsed->{'is_removed_video'} || $parsed->{'is_text_trimmed'},
             };
         }
     }

Tags: livejournal, pm, wisest-owl
Subscribe
  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

  • 0 comments