get it up and running basically on a test file
[spider.git] / perl / ARRL / DX.pm
1 #
2 # (optional) ARRL Dx Database handling
3 #
4 # $Id$
5 #
6 # Copyright (c) 2005 Dirk Koopman G1TLH
7 #
8
9 use strict;
10
11 package ARRL::DX;
12
13 use vars qw($VERSION $BRANCH $dbh $dbname %tabledefs $error);
14
15 main::mkver($VERSION = q$Revision$) if main->can('mkver');
16
17 use DXLog;
18 use DXDebug;
19 use DXUtil;
20 use DBI;
21 use IO::File;
22 use Date::Parse;
23
24 $dbname = "$main::root/data/arrldx.db";
25 %tabledefs = (
26                           paragraph => 'CREATE TABLE paragraph(p text, t int, bullid text)',
27                           paragraph_t_idx => 'CREATE INDEX paragraph_t_idx ON paragraph(t DESC)',
28                           refer => 'CREATE TABLE refer(r text, rowid int, t int, pos int)',
29                           refer_id_idx => 'CREATE INDEX refer_id_idx ON refer(rowid)',
30                           refer_t_idx => 'CREATE INDEX refer_t_idx ON refer(t DESC)',
31                          );
32
33 sub new
34 {
35         my $pkg = shift;
36         my $class = ref $pkg || $pkg;
37         my %args = @_;
38         
39         $error = undef;
40         
41         unless ($dbh) {
42                 $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "");
43                 unless ($dbh) {
44                         dbg($DBI::errstr);
45                         Log('err', $DBI::errstr);
46                         $error = $DBI::errstr;
47                         return;
48                 }
49                 
50                 # check that all the tables are present and correct
51                 my $sth = $dbh->prepare("select name,type from sqlite_master") or $error = $DBI::errstr, return;
52                 $sth->execute or $error = $DBI::errstr, return;
53                 my %f;
54                 while (my @row = $sth->fetchrow_array) {
55                         $f{$row[0]} = $row[1];
56                 }
57                 foreach my $t (sort keys %tabledefs) {
58                         $dbh->do($tabledefs{$t}) unless exists $f{$t};
59                 }
60                 $sth->finish;
61         }
62
63         my $self = {};
64         
65         if ($args{file}) {
66                 if (ref $args{file}) {
67                         $self->{f} = $args{file};
68                 } else {
69                         $self->{f} = IO::File->new($args{file}) or $error = $!, return;
70                 }
71         } 
72         
73         return bless $self, $class; 
74 }
75
76 sub process
77 {
78         my $self = shift;
79
80         return unless $self->{f};
81         
82         my $state;
83         my $count;
84         
85         $dbh->begin_work;
86         my $f = $self->{f};
87         while (<$f>) {
88 #               print;
89                 unless ($state) {
90                         $state = 'ZC' if /^ZCZC/; 
91                 } elsif ($state eq 'ZC') {
92                         if (/\b(ARLD\d+)\b/) {
93                                 $self->{id} = $1;
94                                 $state = 'id';
95                         }
96                 } elsif ($state eq 'id') {
97                         if (/^Newington\s+CT\s+(\w+)\s+(\d+),\s+(\d+)/i) {
98                                 $state = 'date' ;
99                                 $self->{date} = str2time("$1 $2 $3") if $state eq 'date';
100                         }
101                 } elsif ($state eq 'date') {
102                         if (/^$self->{id}/) {
103                                 last unless /DX\s+[Nn]ews\s*$/;
104                                 $state = 'week'; 
105                         }
106                 } elsif ($state eq 'week') {
107                         $state = 'weekro' if /^This\s+week/;
108                 } elsif ($state eq 'weekro') {
109                         if (/^\s*$/) {
110                                 $state = 'para';
111                                 $self->{para} = "";
112                         }
113                 } elsif ($state eq 'para') {
114                         if (/^\s*$/) {
115                                 if ($self->{para}) {
116                                         $self->{para} =~ s/^\s+//;
117                                         $self->{para} =~ s/\s+$//;
118                                         $self->{para} =~ s/\s+/ /g;
119                                         $self->insert;
120                                         $self->{para} = "";
121                                         $count++;
122                                 }
123                         } elsif (/^THIS\s+WEEKEND/) {
124                                 last;
125                         }
126                         chomp;
127                         s/^\s+//;
128                         s/\s+$//;
129                         $self->{para} .= $_ . ' ';
130                 }
131         }
132         $dbh->commit;
133         $self->{f}->close;
134         delete $self->{f};
135         return $count;
136 }
137
138 sub insert
139 {
140         my $self = shift;
141         my $sth = $dbh->prepare("insert into paragraph values(?,?,?)");
142         $sth->execute($self->{para}, $self->{date}, $self->{id});
143         my $lastrow = $dbh->func('last_insert_rowid');
144 }
145
146 sub close
147 {
148         $dbh->disconnect;
149         undef $dbh;
150 }
151 1;