added a 'depth' to the matchprefix thing, to allow matching as high
[spider.git] / perl / Prefix.pm
index 005d3309020ec41cf61093fc8ec33aa0e91c0e60..cb62626958aed98979681ca8be6ec2ddbf9a08c6 100644 (file)
@@ -136,16 +136,32 @@ sub next
 sub matchprefix
 {
        my $pref = shift;
+       my @partials;
 
        for (my $i = length $pref; $i; $i--) {
+               $matchtotal++;
                my $s = substr($pref, 0, $i);
-               my @out = get($s);
-               if (isdbg('prefix')) {
-                       my $part = $out[0] || "*";
-                       $part .= '*' unless $part eq '*' || $part eq $s;
-                       dbg("Partial prefix: $pref $s $part" );
-               } 
-               return @out if @out && $out[0] eq $s;
+               my $p = $cache{$s};
+               if ($p) {
+                       $hits++;
+                       if (isdbg('prefix')) {
+                               my $percent = sprintf "%.1f", $hits * 100 / $matchtotal;
+                               dbg("Partial Prefix Cache Hit: $s Hits: $hits of $matchtotal = $percent\%");
+                       }
+                       return @$p;
+               } else {
+                       push @partials, $s;
+                       my @out = get($s);
+                       if (isdbg('prefix')) {
+                               my $part = $out[0] || "*";
+                               $part .= '*' unless $part eq '*' || $part eq $s;
+                               dbg("Partial prefix: $pref $s $part" );
+                       } 
+                       if (@out && $out[0] eq $s) {
+                               $cache{$_} = \@out for @partials;
+                               return @out;
+                       } 
+               }
        }
        return ();
 }
@@ -169,11 +185,9 @@ sub extract
        # clear out the cache periodically to stop it growing for ever.
        if ($main::systime - $lasttime >= 15*60) {
                if (isdbg('prefix')) {
-                       my $percent = $hits * 100 / $matchtotal;
-                       dbg("Prefix Cache Cleared, Hits: $hits of $matchtotal = $percent\%") 
+                       my $percent = sprintf "%.1f", $hits * 100 / $matchtotal;
+                       dbg("Prefix Cache Cleared, Hits: $hits of $matchtotal = $percent\%") ;
                }
-               my $percent = $hits * 100 / $matchtotal;
-               dbg("Prefix Cache Cleared, $percent\% hits") if isdbg('prefix');
                %cache =();
                $lasttime = $main::systime;
                $hits = $matchtotal = 0;
@@ -189,8 +203,8 @@ LM: foreach $call (split /,/, $calls) {
                if ($p) {
                        $hits++;
                        if (isdbg('prefix')) {
-                               my $percent = $hits * 100 / $matchtotal;
-                               dbg("Prefix Cache Hit: $call Hits: $hits of $matchtotal = $percent\%") 
+                               my $percent = sprintf "%.1f", $hits * 100 / $matchtotal;
+                               dbg("Prefix Cache Hit: $call Hits: $hits of $matchtotal = $percent\%");
                        }
                        push @out, @$p;
                        next;
@@ -272,7 +286,7 @@ LM: foreach $call (split /,/, $calls) {
                if (@parts == 1) {
                        @nout = matchprefix($parts[0]);
                        if (@nout) {
-                               dbg("got prefix: $call ]") if isdbg('prefix');
+                               dbg("got prefix: $call = $nout[0]") if isdbg('prefix');
                                $cache{$call} = \@nout;
                                push @out, @nout;
                                next;
@@ -337,7 +351,7 @@ L1:         for ($n = 0; $n < @parts; $n++) {
                push @out, @nout;
        }
        
-       if (isdbg('prefix')) {
+       if (isdbg('prefixdata')) {
                my $dd = new Data::Dumper([ \@out ], [qw(@out)]);
                dbg($dd->Dumpxs);
        }