Committer: ailyin
LJSUP-6780 (Twitter Digest): checkpoint commitU trunk/bin/upgrading/en_LJ.dat U trunk/bin/upgrading/update-db-local.pl A trunk/bin/worker/twitter-digest A trunk/cgi-bin/LJ/Client/Twitter/ A trunk/cgi-bin/LJ/Client/Twitter/Tweet.pm A trunk/cgi-bin/LJ/Client/Twitter/User.pm U trunk/cgi-bin/LJ/Client/Twitter.pm A trunk/cgi-bin/LJ/Hooks/TwitterDigest.pm U trunk/cgi-bin/LJ/Setting/TwitterConnect.pm A trunk/cgi-bin/LJ/TwitterDigest.pm U trunk/templates/Settings/TwitterConnect.tmpl
Modified: trunk/bin/upgrading/en_LJ.dat =================================================================== --- trunk/bin/upgrading/en_LJ.dat 2010-09-16 08:36:33 UTC (rev 9508) +++ trunk/bin/upgrading/en_LJ.dat 2010-09-16 13:06:04 UTC (rev 9509) @@ -5128,6 +5128,8 @@ setting.twitterconnect.option.comments_twitter=Publish my comments to Twitter by default +setting.twitterconnect.option.digest=Publish my tweets to my journal + setting.twitterconnect.option.entries_twitter=Publish my entries to Twitter by default setting.userapps.change=Change @@ -6138,6 +6140,26 @@ <A href="[[typepad_rules_url]]">Read the official rules here</a>. . +twitter_digest.spam=<em>This tweet contained a link that was a potential spam threat.</em> + +twitter_digest.subject=Tweets + +twitter_digest.tags=twitter + +twitter_digest.weekday.mon=Mon, + +twitter_digest.weekday.tue=Tue, + +twitter_digest.weekday.wed=Wed, + +twitter_digest.weekday.thu=Thu, + +twitter_digest.weekday.fri=Fri, + +twitter_digest.weekday.sat=Sat, + +twitter_digest.weekday.sun=Sun, + twitterconnect.untitled.comment=New comment at LiveJournal: twitterconnect.untitled.entry=New entry at LiveJournal: Modified: trunk/bin/upgrading/update-db-local.pl =================================================================== --- trunk/bin/upgrading/update-db-local.pl 2010-09-16 08:36:33 UTC (rev 9508) +++ trunk/bin/upgrading/update-db-local.pl 2010-09-16 13:06:04 UTC (rev 9509) @@ -1576,6 +1576,18 @@ ) TYPE=InnoDB EOC +# see LJ::TwitterDigest +register_tablecreate("twitter_digest_status", <<'EOC'); +CREATE TABLE twitter_digest_status ( + userid INT NOT NULL DEFAULT 0 PRIMARY KEY, + next_post_time INT NOT NULL DEFAULT 0, + locked_until INT NOT NULL DEFAULT 0, + disabled INT NOT NULL DEFAULT 0, + + INDEX(next_post_time) +) TYPE=InnoDB +EOC + # ************************************************************* register_alter(sub { Added: trunk/bin/worker/twitter-digest =================================================================== --- trunk/bin/worker/twitter-digest (rev 0) +++ trunk/bin/worker/twitter-digest 2010-09-16 13:06:04 UTC (rev 9509) @@ -0,0 +1,24 @@ +#!/usr/bin/perl -w +use strict; +use warnings; + +use lib "$ENV{'LJHOME'}/cgi-bin"; +require 'ljlib.pl'; + +LJ::NewWorker::TwitterDigest->start; + +package LJ::NewWorker::TwitterDigest; +use base qw(LJ::NewWorker::Manual); + +use LJ::Client::Twitter; + +sub work { + warn "hi\n"; + return 0; ## pretend we didn't work and sleep +} + +sub on_idle { + sleep 1800 unless $LJ::IS_DEV_SERVER; +} + +1; Property changes on: trunk/bin/worker/twitter-digest ___________________________________________________________________ Added: svn:executable + * Added: trunk/cgi-bin/LJ/Client/Twitter/Tweet.pm =================================================================== --- trunk/cgi-bin/LJ/Client/Twitter/Tweet.pm (rev 0) +++ trunk/cgi-bin/LJ/Client/Twitter/Tweet.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -0,0 +1,106 @@ +package LJ::Client::Twitter::Tweet; +use strict; +use warnings; + +use LJ::Client::Twitter::User; + +sub from_hash { + my ($class, $data) = @_; + + my $obj = { + 'time' => LJ::Client::Twitter->parse_time($data->{'created_at'}), + 'text' => $data->{'text'}, + 'user' => LJ::Client::Twitter::User->from_hash($data->{'user'}), + 'id' => $data->{'id'}, + 'retweeted_from' => undef, + }; + + if ($data->{'retweeted_status'}) { + $obj->{'retweeted_from'} = + $class->from_hash($data->{'retweeted_status'}); + } + + return bless $obj, $class; +} + +# getters +sub post_time { + my ($self) = @_; + return $self->{'time'}; +} + +sub text_raw { + my ($self) = @_; + return $self->{'text'}; +} + +sub user { + my ($self) = @_; + return $self->{'user'}; +} + +sub id { + my ($self) = @_; + return $self->{'id'}; +} + +sub original_tweet { + my ($self) = @_; + return $self->{'retweeted_from'} || $self; +} + +# some simple manipulations on the data +sub url { + my ($self) = @_; + my $original = $self->original_tweet; + + return $original->user->url . '/status/' . $original->id; +} + +# regexps are taken from +# http://github.com/mzsanford/twitter-text-rb/blob/master/lib/regex.rb +# the regexps were transformed by having ruby compile them for us +# and then dump to stdout +my $re_auto_link_usernames_or_lists = qr/(?-mix:([^a-zA-Z0-9_]|^)([@\357\274\240]+)([a-zA-Z0-9_]{1,20})(\/(?-mix:[a-zA-Z][a-zA-Z0-9_\-\200-\377]{0,24}))?)/o; + +my $re_auto_link_hashtags = qr/(?i-mx:(^|[^0-9A-Z&\/]+)(#|\357\274\203)([0-9A-Z_]*[A-Z_]+(?i-mx:[a-z0-9_\303\200\303\201\303\202\303\203\303\204\303\205\303\206\303\207\303\210\303\211\303\212\303\213\303\214\303\215\303\216\303\217\303\220\303\221\303\222\303\223\303\224\303\225\303\226\303\230\303\231\303\232\303\233\303\234\303\235\303\236\303\237\303\240\303\241\303\242\303\243\303\244\303\245\303\246\303\247\303\250\303\251\303\252\303\253\303\254\303\255\303\256\303\257\303\260\303\261\303\262\303\263\303\264\303\265\303\266\303\270\303\271\303\272\303\273\303\274\303\275\303\276\303\277])*))/o; + +my $re_valid_url = qr/(?ix-m:(((?-mix:[^\/"':!=]|^|\:))((https?:\/\/|www\.)((?i-mx:(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?))(\/(?ix-m:(?i-mx:\((?i-mx:[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~])+\))|@(?i-mx:[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~])+\/|[\.\,]?(?i-mx:[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]))*(?i-mx:[a-z0-9=#\/])?)?(\?(?i-mx:[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~])*(?i-mx:[a-z0-9_&=#]))?)))/o; + +# and substitutions here are taken from +# http://github.com/mzsanford/twitter-text-rb/blob/master/lib/autolink.rb +sub _format_username_callback { + my ($before1, $before2, $text, $list) = @_; + + if ($list) { + $text = $list = $text . $list; + $list = lc($list); + return "$before1$before2<a href=\"http://twitter.com/$list\">$text</a>"; + } else { + return "$before1$before2<a href=\"http://twitter.com/$text\">$text</a>"; + } +} + +sub _format_url_callback { + my ($before, $url, $protocol) = @_; + + my $full_url = $url; + if ($protocol =~ /www\./i) { + $full_url = 'http://' . $url; + } + + return "$before<a href=\"$full_url\">$url</a>"; +} + +sub text_formatted { + my ($self) = @_; + + my $text = $self->text_raw; + $text =~ s{$re_auto_link_hashtags}{$1<a href="http://twitter.com/search?q=%23$3" title="#$3">$2$3</a>}g; + $text =~ s{$re_auto_link_usernames_or_lists}{_format_username_callback($1, $2, $3, $4)}ge; + $text =~ s{$re_valid_url}{_format_url_callback($2,$3,$4)}ge; + + return $text; +} + +1; Added: trunk/cgi-bin/LJ/Client/Twitter/User.pm =================================================================== --- trunk/cgi-bin/LJ/Client/Twitter/User.pm (rev 0) +++ trunk/cgi-bin/LJ/Client/Twitter/User.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -0,0 +1,82 @@ +package LJ::Client::Twitter::User; +use strict; +use warnings; + +sub from_hash { + my ($class, $data) = @_; + + return bless { + 'id' => $data->{'id'}, + 'screen_name' => $data->{'screen_name'}, + 'homepage' => $data->{'url'}, + 'lang' => $data->{'lang'}, + 'location' => $data->{'location'}, + 'name' => $data->{'name'}, + 'bio' => $data->{'description'}, + 'protected' => $data->{'protected'}, + 'time_zone' => $data->{'time_zone'}, + }, $class; +} + +sub from_screen_name { + my ($class, $sname) = @_; + + return bless { + 'screen_name' => $sname, + }, $class; +} + +# getters +sub id { + my ($self) = @_; + return $self->{'id'}; +} + +sub screen_name { + my ($self) = @_; + return $self->{'screen_name'}; +} + +sub homepage { + my ($self) = @_; + return $self->{'homepage'}; +} + +sub lang { + my ($self) = @_; + return $self->{'lang'}; +} + +sub location { + my ($self) = @_; + return $self->{'location'}; +} + +sub name { + my ($self) = @_; + return $self->{'name'}; +} + +sub bio { + my ($self) = @_; + return $self->{'bio'}; +} + +sub protected { + my ($self) = @_; + return $self->{'protected'}; +} + +sub time_zone { + my ($self) = @_; + return $self->{'time_zone'}; +} + +# some simple manipulations on the data +sub url { + my ($self) = @_; + + return 'http://twitter.com/' . $self->screen_name; +} + +1; Modified: trunk/cgi-bin/LJ/Client/Twitter.pm =================================================================== --- trunk/cgi-bin/LJ/Client/Twitter.pm 2010-09-16 08:36:33 UTC (rev 9508) +++ trunk/cgi-bin/LJ/Client/Twitter.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -8,6 +8,9 @@ use Net::OAuth::ProtectedResourceRequest; use HTTP::Request::Common; +use LJ::Client::Twitter::Tweet; +use LJ::Client::Twitter::User; + =head1 NAME LJ::Client::Twitter - the module that handles communication between LJ @@ -325,4 +328,63 @@ ); } +# this parses strings like 'Sat Jul 03 21:24:02 +0000 2010'; +# I haven't found a decent parser for that very format on CPAN, +# so here's an ad hoc one. if you go ahead and try replacing it +# with a third-party module, these two are worth considering: +# +# http://github.com/yannk/loudtwitter/blob/master/dev-local-lib/DateTime/Format/Twitter.pm +# (the loudtwitter project) +# +# DateTime::Format::Strptime->new(pattern => '%a %b %d %T %z %Y'); +# (from Net::Twitter::API) +# +# - ailyin, Sep 15, 2010 +#TODO: POD +sub parse_time { + my ($class, $time) = @_; + + my ($wday, $mon, $day, $hr, $min, $sec, $tz, $year) = + split(/[: ]+/, $time); + + my $mon_number = { + 'Jan' => 1, + 'Feb' => 2, + 'Mar' => 3, + 'Apr' => 4, + 'May' => 5, + 'Jun' => 6, + 'Jul' => 7, + 'Aug' => 8, + 'Sep' => 9, + 'Oct' => 10, + 'Nov' => 11, + 'Dec' => 12, + }->{$mon}; + + my $dt = DateTime->new( 'year' => $year, + 'month' => $mon_number, + 'day' => $day, + 'hour' => $hr, + 'minute' => $min, + 'second' => $sec, + 'time_zone' => $tz, ); + + return $dt->epoch; +} + +sub user_last_tweets { + my ($class, $u) = @_; + + my $res = LJ::Client::Twitter->call( + 'api_method' => 'statuses/user_timeline', + 'user' => $u, + 'http_method' => 'GET', + 'params' => { 'count' => 200, + 'include_rts' => 1, }, + ); + + return [ map { LJ::Client::Twitter::Tweet->from_hash($_) } @$res ]; +} + 1; Added: trunk/cgi-bin/LJ/Hooks/TwitterDigest.pm =================================================================== --- trunk/cgi-bin/LJ/Hooks/TwitterDigest.pm (rev 0) +++ trunk/cgi-bin/LJ/Hooks/TwitterDigest.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -0,0 +1,28 @@ +package LJ::Hooks::TwitterDigest;++use strict;+use warnings;++use LJ::TwitterDigest;++LJ::register_hook('props_changed', sub {+ my ($u, $changes) = @_;++ if ($changes->{'timezone'}) {+ if ( $u->prop('twitter_access_token')+ && LJ::TwitterDigest->turned_on_for_user($u) )+ {+ LJ::TwitterDigest->set_next_post_time($u);+ }+ }++ if (exists $changes->{'twitter_access_token'}) {+ if ( !$changes->{'twitter_access_token'} ) {+ LJ::TwitterDigest->disable_for_user($u);+ } elsif ( LJ::TwitterDigest->turned_on_for_user($u) ) {+ LJ::TwitterDigest->set_next_post_time($u);+ }+ }+});++1; \ No newline at end of file Modified: trunk/cgi-bin/LJ/Setting/TwitterConnect.pm =================================================================== --- trunk/cgi-bin/LJ/Setting/TwitterConnect.pm 2010-09-16 08:36:33 UTC (rev 9508) +++ trunk/cgi-bin/LJ/Setting/TwitterConnect.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -4,6 +4,7 @@ use warnings; use LJ::Client::Twitter; +use LJ::TwitterDigest; sub should_render { my ( $class, $u ) = @_; @@ -62,6 +63,7 @@ 'twitter_link' => $u->prop('twitter_link'), 'repost_entries' => $repost_entries, 'repost_comments' => $repost_comments, + 'digest' => LJ::TwitterDigest->turned_on_for_user($u), 'is_identity' => $u->is_identity, ); @@ -99,6 +101,13 @@ $u->third_party_notify_list_remove('twitter-comments'); } + if ( $class->get_arg( $args, 'digest' ) ) { + LJ::TwitterDigest->set_next_post_time($u); + } + else { + LJ::TwitterDigest->turn_off_for_user($u); + } + return 1; } Added: trunk/cgi-bin/LJ/TwitterDigest.pm =================================================================== --- trunk/cgi-bin/LJ/TwitterDigest.pm (rev 0) +++ trunk/cgi-bin/LJ/TwitterDigest.pm 2010-09-16 13:06:04 UTC (rev 9509) @@ -0,0 +1,182 @@ +package LJ::TwitterDigest; +use strict; +use warnings; + +use LJ::SpamFilter; +use LJ::Client::Twitter; +use LJ::HTML::Template; +use LJ::TimeUtil; + +# note that this one turns the feature on if it was turned off before +sub set_next_post_time { + my ($class, $u) = @_; + + # do not set it within the next 15 minutes, as a primitive + # rate-limiting measure + my $ts = LJ::TimeUtil->next_afternoon( $u->prop("timezone") || 'GMT', + time + 900 ); + + my $dbh = LJ::get_db_writer(); $dbh->{'RaiseError'} = 1; + $dbh->do(qq{ + REPLACE INTO twitter_digest_status + SET userid=?, next_post_time=? + }, undef, $u->id, $ts); +} + +sub turn_off_for_user { + my ($class, $u) = @_; + + my $dbh = LJ::get_db_writer(); $dbh->{'RaiseError'} = 1; + $dbh->do(q{ + DELETE FROM twitter_digest_status + WHERE userid=? + }, undef, $u->id); +} + +sub turned_on_for_user { + my ($class, $u) = @_; + + my $dbr = LJ::get_db_reader(); $dbr->{'RaiseError'} = 1; + my ($userid) = $dbr->selectrow_array(qq{ + SELECT userid + FROM twitter_digest_status + WHERE userid=? + }, undef, $u->id); + + return defined $userid ? 1 : 0; +} + +sub disable_for_user { + my ($class, $u) = @_; + + my $dbh = LJ::get_db_writer(); $dbh->{'RaiseError'} = 1; + $dbh->do(q{ + UPDATE twitter_digest_status + SET disabled=1 + WHERE userid=? + }, undef, $u->id); +} + +sub get_pending_user { + my ($class) = @_; + + # we're using master here to avoid a replag; otherwise it could + # work so that a user is updated indicating that we posted their + # digest but then another process picks it up from a slave and + # posts their digest again + my $dbh = LJ::get_db_writer(); $dbh->{'RaiseError'} = 1; + + # we're doing it in a transaction so that two threads never pick + # up the same user due to a race condition; after we get a user, + # it is locked for the next 15 minutes, and then the transaction + # is immediately committed, so the lock doesn't really block the + # other processes for a long time + my $tx = LJ::Transaction->new($dbh); + + my ($userid) = $dbh->selectrow_array(qq{ + SELECT FOR UPDATE userid + FROM twitter_digest_status + WHERE turned_on = 1 + AND next_post_time < UNIX_TIMESTAMP() + AND locked_until < UNIX_TIMESTAMP() + }); + + unless (defined $userid) { + $tx->rollback; # not that it matters because we didn't update + # anything just yet, but let's not rely on the + # destructor + + return; + } + + # let's lock them too + $dbh->do(qq{ + UPDATE twitter_digest_status + SET locked_until = UNIX_TIMESTAMP() + 900 + WHERE userid=? + }, undef, $userid); + + $tx->commit; + + return LJ::load_userid($userid); +} + +my @weekdays_ml = + map { "twitter_digest.weekday.$_" } + qw( sun mon tue wed thu fri sat ); + +sub post_digest { + my ($class, $u) = @_; + + # this way, ML knows which language to use + LJ::set_remote($u); + + my $tweets = LJ::Client::Twitter->user_last_tweets($u); + + my @tweets_filtered = + sort { $a->post_time <=> $b->post_time } + grep { $_->post_time > time - 86400 } + @$tweets; + + my @tweets_display; + foreach my $tw (@tweets_filtered) { + my $dt = DateTime->from_epoch( + 'epoch' => $tw->post_time, + 'time_zone' => $u->prop('timezone') || 'GMT', + ); + + my $weekday = LJ::Lang::ml($weekdays_ml[$dt->wday_0]); + + my $text = $tw->text_formatted; + + if ( LJ::SpamFilter->is_spam($text) ) { + $text = LJ::Lang::ml('twitter_digest.spam'); + } + + push @tweets_display, { + 'url' => $tw->url, + 'time' => $weekday . ' ' . $dt->strftime('%H:%M'), + 'text' => $text, + }; + } + + my $template = LJ::HTML::Template->new( + 'scalarref' => \q{ + <ul> + <TMPL_LOOP tweets> + <li> + <a href="<TMPL_VAR url>"><em><TMPL_VAR time></em></a>: + <TMPL_VAR text> + </li> + </TMPL_LOOP> + </ul> + }, + ); + + $template->param( 'tweets' => \@tweets_display ); + + my $evt = $template->output; + $evt =~ s/\s+/ /sg; + + my $res = LJ::Protocol::do_request('postevent', { + 'ver' => 1, + 'username' => $u->user, + 'event' => $evt, + 'subject' => LJ::Lang::ml('twitter_digest.subject'), + 'props' => { + 'taglist' => LJ::Lang::ml('twitter_digest.tags'), + }, + 'tz' => 'guess', + + # let's not repost the digest back to twitter or facebook + 'facebook_repost_aware' => 1, + 'repost_facebook' => 0, + 'twitter_repost_aware' => 1, + 'repost_twitter' => 0, + + }, \"", { 'noauth' => 1 }); + + return $res; +} + +1; Modified: trunk/templates/Settings/TwitterConnect.tmpl =================================================================== --- trunk/templates/Settings/TwitterConnect.tmpl 2010-09-16 08:36:33 UTC (rev 9508) +++ trunk/templates/Settings/TwitterConnect.tmpl 2010-09-16 13:06:04 UTC (rev 9509) @@ -5,6 +5,10 @@ (<a href="<TMPL_VAR disconnect_link>"><TMPL_VAR expr="ml('setting.twitterconnect.link.remove')"></a>) </p> <ul class="b-manage-connectopt"> + <li> + <input type="checkbox" name="<TMPL_VAR form_field_prefix>digest" value="1" id="<TMPL_VAR form_field_prefix>digest"<TMPL_IF digest> checked="checked"</TMPL_IF> /> + <label for="<TMPL_VAR form_field_prefix>digest"><TMPL_VAR expr="ml('setting.twitterconnect.option.digest')"></label> + </li> <TMPL_IF is_identity> <li class="disabled"> <input type="checkbox" name="<TMPL_VAR form_field_prefix>repost_entries" value="1" id="<TMPL_VAR form_field_prefix>repost_entries" disabled="disabled" /> @@ -25,6 +29,7 @@ <TMPL_ELSE> <p class="b-manage-connectbtn"><button type="submit" name="<TMPL_VAR form_field_prefix>connect" value="<TMPL_VAR expr="ml('setting.twitterconnect.button.twitter_connect')">" class="b-connectbtn b-connectbtn-twitter" title="<TMPL_VAR expr="ml('setting.twitterconnect.button.twitter_connect')">"><span><i></i><TMPL_VAR expr="ml('setting.twitterconnect.button.twitter_connect')"></span></button></p> <ul class="b-manage-connectopt"> + <li class="disabled"><input type="checkbox" name="<TMPL_VAR form_field_prefix>digest" value="1" id="<TMPL_VAR form_field_prefix>digest" disabled="disabled"<TMPL_IF digest> checked="checked"</TMPL_IF>> <label for="<TMPL_VAR form_field_prefix>digest"><TMPL_VAR expr="ml('setting.twitterconnect.option.digest')"></label></li> <li class="disabled"><input type="checkbox" name="<TMPL_VAR form_field_prefix>repost_entries" value="1" id="<TMPL_VAR form_field_prefix>repost_entries" disabled="disabled"<TMPL_IF repost_entries> checked="checked"</TMPL_IF>> <label for="<TMPL_VAR form_field_prefix>repost_entries"><TMPL_VAR expr="ml('setting.twitterconnect.option.entries_twitter')"></label></li> <li class="disabled"><input type="checkbox" name="<TMPL_VAR form_field_prefix>repost_comments" value="1" id="<TMPL_VAR form_field_prefix>repost_comments" disabled="disabled"<TMPL_IF repost_comments> checked="checked"</TMPL_IF>> <label for="<TMPL_VAR form_field_prefix>repost_comments"><TMPL_VAR expr="ml('setting.twitterconnect.option.comments_twitter')"></label></li> </ul>