Первый фоллен на хелле (wildkin) wrote in changelog,
Первый фоллен на хелле
wildkin
changelog

[livejournal] r20962: LJSUP-10951: Return spellcheck to S1

Committer: dnikolaev
LJSUP-10951: Return spellcheck to S1

U   trunk/cgi-bin/LJ/SpellCheck.pm
Modified: trunk/cgi-bin/LJ/SpellCheck.pm
===================================================================
--- trunk/cgi-bin/LJ/SpellCheck.pm	2012-01-23 15:51:12 UTC (rev 20961)
+++ trunk/cgi-bin/LJ/SpellCheck.pm	2012-01-24 06:16:41 UTC (rev 20962)
@@ -17,7 +17,7 @@
 package LJ::SpellCheck;
 
 use strict;
-use IPC::Run qw/run timeout/;
+use IPC::Run;
 
 use vars qw($VERSION);
 $VERSION = '1.0';
@@ -26,28 +26,55 @@
 #    ispell -a -h  (default)
 #    /usr/local/bin/aspell pipe -H --sug-mode=fast --ignore-case
 
+my @DEFAULT_COMMAND = qw/aspell -a -H --ignore-case --sug-mode=fast --encoding=utf-8/;
+
+my $LANGUAGES = {
+    'ru'    => 'ru',
+    'en_lj' => 'en',    # lower case of 'en_LJ'
+    'en_gb' => 'en_GB',
+    'de'    => 'de',
+};
+
 sub new {
     my ($class, $args) = @_;
     my $self = {};
     bless $self, ref $class || $class;
 
-    $self->{'command'} = $args->{'spellcommand'} || [qw/ispell -a -h/];
-    $self->{'color'} = $args->{'color'} || "#FF0000";
+    $self->{command} = $args->{spellcommand} || [ @DEFAULT_COMMAND ];
+    $self->{color} = $args->{color} || "#FF0000";
     return $self;
 }
 
-# This function takes a block of text to spell-check and returns HTML 
-# to show suggesting correction, if any.  If the return from this 
-# function is empty, then there were no misspellings found.
+sub run_aspell {
+    my ($text_ref, $opts, $handler_misspelled, $handler_text) = @_;
 
-sub check_html {
-    my $self = shift;
-    my $journal = shift;
- 
-    my @in_lines    = split /[\r\n]+/, $$journal;
-    my @out_lines; 
-    my $color = $self->{'color'};
+    if (ref($handler_misspelled) ne 'CODE') {
+        die "Invalid handler_misspelled parameter - need coderef";
+    }
 
+    $handler_text = undef unless defined $handler_text && ref($handler_text) eq 'CODE';
+
+    $opts = {} unless defined($opts) && ref($opts) eq 'HASH';
+
+    my $command = $opts->{command};
+    if ($command) {
+        if (ref($command) ne 'ARRAY') {
+            die "Invalid parameter 'command' - need arrayref";
+        }
+    }
+    else {
+        $command = [ @DEFAULT_COMMAND ];
+        if (my $language = $opts->{language}) {
+            $language = $LANGUAGES->{lc($language)};
+            return (0, 'unsupported_language') unless $language;
+
+            push @$command, "--lang=$language";
+        }
+    }
+
+    my @in_lines = split qr/[\r\n]+/, $$text_ref;
+    my @out_lines;
+
     {
         my ($in, $out, $err);
         
@@ -55,55 +82,135 @@
         ## ^ = escape each line (i.e. each line is text, not control command for aspell)
         $in = "!\n" . join("\n", map { "^$_" } @in_lines);
 
-        run($self->{'command'}, \$in, \$out, \$err, timeout(10))
-            or die "Can't run spellchecker: $?";
+        warn join(' ', @$command), "\n";
+
+        IPC::Run::run($command, \$in, \$out, \$err, IPC::Run::timeout(10))
+            or die "Can't run spellchecker: $? ($err)";
+
         @out_lines = split /\n/, $out;
-        
+
         warn "Spellchecker warning: $err" 
             if $err;
-        
+
         my $signature = shift @out_lines;
         die "Invalid spellchecker reply: $signature"
             unless $signature && $signature =~ /^@\(#\)/;
     }
 
-    my ($output, $footnotes, $has_errors, %seen_mispelled_words);
-
     INPUT_LINE:
     foreach my $input_line (@in_lines) {
-        my $pos = 0;
+        my $text_pos = 0;
         ASPELL_LINE: 
         while (my $aspell_line = shift @out_lines) {
-            my ($word, $offset, $suggestions_list);
+            my ($word, $offset, $suggestions_str);
+
             if (!$aspell_line) {
                 next INPUT_LINE;
-            } elsif ($aspell_line =~ /^& (\S+) \d+ (\d+): (.*)$/) {
-                ($word, $offset, $suggestions_list) = ($1, $2, $3);
-            } elsif ($aspell_line =~ /^\# (\S+) (\d+)/) {
-                my ($word, $offset, $suggestions_list) = ($1, $2, undef);
-            } else {
+            }
+            elsif ($aspell_line =~ /^& (\S+) \d+ (\d+): (.*)$/) {
+                ($word, $offset, $suggestions_str) = ($1, $2, $3);
+            }
+            elsif ($aspell_line =~ /^\# (\S+) (\d+)/) {
+                ($word, $offset, $suggestions_str) = ($1, $2, undef);
+            }
+            else {
                 next ASPELL_LINE;
             }
 
-            $output .= LJ::ehtml(substr($input_line, $pos, $offset-$pos-1));
-            $output .= "<font color='$color'>".LJ::ehtml($word)."</font>";
+            $offset--; # due to escaping each line by char '^'
 
-            if ($suggestions_list && !$seen_mispelled_words{$word}++) {
-                $footnotes .= 
-                    "<tr valign=top><td align=right><font color='$color'>".LJ::ehtml($word).
-                    "</font></td><td>".LJ::ehtml($suggestions_list)."</td></tr>\n";
-            }
-            $pos = $offset + length($word) - 1;
-            $has_errors++;
+            $handler_text->(substr($input_line, $text_pos, $offset - $text_pos)) if $handler_text && $text_pos < $offset;
+            $handler_misspelled->($word, $suggestions_str);
+            $text_pos = $offset + length($word);
         }
-        $output .= LJ::ehtml(substr($input_line, $pos, length($input_line)-$pos)) . "<br>\n";
+
+        $handler_text->(substr($input_line, $text_pos, length($input_line) - $text_pos) . "\n") if $handler_text && $text_pos < length($input_line);
     }
-   
-    return ($has_errors) 
-            ? "$output<p><b>Suggestions:</b><table cellpadding=3 border=0>$footnotes</table>"
-            : "";
+  
+    return (1, 'ok');
 }
 
+sub check {
+    my ($text_ref, $opts) = @_;
+
+    $opts = {} unless defined $opts && ref($opts) eq 'HASH';
+    my $limit = 0 + $opts->{limit};
+
+    my %words;
+    my $handler_misspelled = sub {
+        my ($word, $suggestions_str) = @_;
+
+        return unless $suggestions_str;
+        return if exists $words{$word};
+
+        my @suggestions = split qr/,\s*/, $suggestions_str;
+        if ($limit && @suggestions > $limit) {
+            @suggestions = @suggestions[0 .. $limit - 1];
+        }
+        $words{$word} = [ @suggestions ];
+    };
+
+    my ($result, $status) = run_aspell($text_ref, $opts, $handler_misspelled, undef);
+
+    if ($result) {
+        return {
+            status   => 'ok',
+            words    => \%words,
+            language => $opts->{language},
+        };
+    }
+    else {
+        return {
+            status   => 'status',
+            language => $opts->{language},
+        }
+    }
+}
+
+# This function takes a block of text to spell-check and returns HTML 
+# to show suggesting correction, if any.  If the return from this 
+# function is empty, then there were no misspellings found.
+
+sub check_html {
+    my ($self, $text_ref) = @_;
+
+    my $color = $self->{'color'};
+
+    my ($output, $footnotes, %seen_mispelled_words);
+    my $pos = 0;
+
+    my $handler_misspelled = sub {
+        my ($word, $suggestions_str) = @_;
+
+        $output .= "<font color='$color'>" . LJ::ehtml($word) . "</font>";
+
+        if ($suggestions_str && !$seen_mispelled_words{$word}++) {
+            $footnotes .= 
+                "<tr valign=top>" .
+                    "<td align=right>" . 
+                        "<font color='$color'>" . LJ::ehtml($word) . "</font>" . 
+                    "</td>" .
+                    "<td>" .
+                        LJ::ehtml($suggestions_str) .
+                    "</td>" .
+                "</tr>\n";
+        }
+    };
+
+    my $handler_text = sub {
+        my $text = LJ::ehtml(shift);
+        $text =~ s/[\r\n]+/<br>/g;
+        $output .= $text;
+    };
+
+    my ($result, $status) = run_aspell($text_ref, {language => 'ru'}, $handler_misspelled, $handler_text);
+
+    return '' unless $result;
+
+    $output .= "<p><b>Suggestions:</b><table cellpadding=3 border=0>$footnotes</table>" if $footnotes;
+    return $output;
+}
+
 1;
 __END__
 

Tags: dnikolaev, livejournal, pm, wildkin
Subscribe

  • Post a new comment

    Error

    Anonymous comments are disabled in this journal

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

  • 0 comments