@@ -7,8 +7,8 @@ use utf8;
77use Redis;
88use File::Find;
99
10- use LANraragi::Utils::Generic qw( remove_spaces remove_newlines is_archive) ;
11- use LANraragi::Utils::Database qw( redis_decode) ;
10+ use LANraragi::Utils::Generic qw( remove_spaces remove_newlines is_archive trim_url ) ;
11+ use LANraragi::Utils::Database qw( redis_decode redis_encode ) ;
1212use LANraragi::Utils::Logging qw( get_logger) ;
1313
1414sub get_archive_count {
@@ -43,44 +43,90 @@ sub get_page_stat {
4343 return $stat ;
4444}
4545
46- sub build_tag_json {
47-
48- my $t ;
49- my @tags ;
50- my %tagcloud ;
46+ # This operation builds two hashes: LRR_URL_MAP, which maps URLs to IDs in the database that have them as a source: tag,
47+ # and LRR_STATS, which is a sorted set used to build the statistics/tag cloud JSON.
48+ sub build_stat_hashes {
5149
52- # Login to Redis and get all hashes
53- my $redis = LANraragi::Model::Config-> get_redis;
54- my $logger = get_logger( " Tag Stats" , " lanraragi" );
50+ # This method does only one atomic write transaction, using Redis' watch/multi mode.
51+ # But we can't use the connection to get other data while it's in transaction mode! So we instantiate a second connection to get the data we need.
52+ my $redis = LANraragi::Model::Config-> get_redis;
53+ my $redistx = LANraragi::Model::Config-> get_redis;
54+ my $logger = get_logger( " Tag Stats" , " lanraragi" );
5555
56- # 40-character long keys only => Archive IDs
56+ # 40-character long keys only => Archive IDs
5757 my @keys = $redis -> keys (' ????????????????????????????????????????' );
5858
59- # Iterate on hashes to get their tags
59+ # Cancel the transaction if the hashes have been modified by another job in the meantime.
60+ # This also allows for the previous stats/map to still be readable until we're done.
61+ $redistx -> watch( " LRR_STATS" , " LRR_URLMAP" );
62+ $redistx -> multi;
63+ $redistx -> del(" LRR_STATS" );
64+ $redistx -> del(" LRR_URLMAP" );
65+
66+ # Iterate on hashes to get their tags
67+ $logger -> debug(" Building stat indexes..." );
6068 foreach my $id (@keys ) {
6169 if ( $redis -> hexists( $id , " tags" ) ) {
6270
63- $t = $redis -> hget( $id , " tags" );
64- $t = redis_decode($t );
71+ my $rawtags = $redis -> hget( $id , " tags" );
6572
6673 # Split tags by comma
67- @tags = split ( / ,\s ?/ , $t );
74+ my @tags = split ( / ,\s ?/ , redis_decode( $rawtags ) );
6875
6976 foreach my $t (@tags ) {
70-
7177 remove_spaces($t );
7278 remove_newlines($t );
7379
74- # Increment value of tag or create it
75- if ( exists ( $tagcloud {$t } ) ) { $tagcloud {$t }++; }
76- else { $tagcloud {$t } = 1; }
77- }
80+ # If the tag is a source: tag, add it to the URL index
81+ if ( $t =~ / source:(.*)/i ) {
82+ my $url = $1 ;
83+ $logger -> debug(" Adding $url as an URL for $id " );
84+ trim_url($url );
85+ $logger -> debug(" Trimmed: $url " );
86+ $redistx -> hset( " LRR_URLMAP" , $url , $id ); # No need to encode the value, as URLs are already encoded by design
87+ }
7888
89+ # Increment tag in stats, all lowercased here to avoid redundancy/dupes
90+ $redistx -> zincrby( " LRR_STATS" , 1, redis_encode( lc ($t ) ) );
91+ }
7992 }
8093 }
94+
95+ $redistx -> exec ;
96+ $logger -> debug(" Done!" );
97+ $redis -> quit;
98+ $redistx -> quit;
99+ }
100+
101+ sub is_url_recorded {
102+
103+ my $url = $_ [0];
104+ my $logger = get_logger( " Tag Stats" , " lanraragi" );
105+ my $redis = LANraragi::Model::Config-> get_redis;
106+ my $id = 0;
107+ $logger -> debug(" Checking if url $url is in the url map." );
108+
109+ # Trim last slash from url if it's present
110+ trim_url($url );
111+
112+ if ( $redis -> hexists( " LRR_URLMAP" , $url ) ) {
113+ $id = $redis -> hget( " LRR_URLMAP" , $url );
114+ $logger -> debug(" Found! id $id ." );
115+ }
116+ $redis -> quit;
117+ return $id ;
118+ }
119+
120+ sub build_tag_json {
121+
122+ my $logger = get_logger( " Tag Stats" , " lanraragi" );
123+
124+ # Login to Redis and grab the stats sorted set
125+ my $redis = LANraragi::Model::Config-> get_redis;
126+ my %tagcloud = $redis -> zrange( " LRR_STATS" , 0, -1, " WITHSCORES" );
81127 $redis -> quit();
82128
83- # Go through the tagCloud hash and build a JSON
129+ # Go through the data from stats and build a JSON
84130 my $tagsjson = " [" ;
85131
86132 for ( keys %tagcloud ) {
@@ -89,7 +135,7 @@ sub build_tag_json {
89135 # Split namespace
90136 # detect the : symbol and only use what's after it
91137 my $ns = " " ;
92- my $t = $_ ;
138+ my $t = redis_decode( $_ ) ;
93139 if ( $t =~ / (.*):(.*)/ ) { $ns = $1 ; $t = $2 ; }
94140
95141 if ( $_ ne " " ) {
0 commit comments