#!/bin/env perl
use warnings;
use strict;
use HTML::Parser;
# Prepare parser
my $parser=HTML::Parser->new(
api_version => 3,
strict_comment => 1,
unbroken_text => 1,
utf8_mode => 1,
start_h => [\&c_start, "tagname, attr, text"],
end_h => [\&c_end, "tagname"],
text_h => [\&c_text, "dtext"],
comment_h => [\&c_comment, "token0"]
);
my $text;
my $comment;
my $regtext;
my $uidtext;
my $after_sekce_abc=0;
my $reg_p=0;
# Parse profile web page
$parser->parse_file(*STDIN) || die "HTML parser failed: $!";
sub c_start {
my ($tagname, $attr, $origtext) = @_;
if ($tagname eq 'p' && $after_sekce_abc && !$reg_p) {
$reg_p++;
}
if ($tagname eq 'a' && defined $attr->{href} &&
$attr->{href} =~ /^\/muj_obsah\// && $after_sekce_abc) {
$uidtext=$attr->{href};
$parser->eof;
}
}
sub c_end {
my ($tagname, $attr) = @_;
if ($tagname eq 'p' && $reg_p == 1) {
$reg_p++;
#$after_sekce_abc=0;
}
}
sub c_text {
my ($text) = @_;
if ($reg_p == 1) {
if ($text =~ /Datum registrace/) {
# Registraion data found, terminating parser
$regtext=$text;
} else {
# Between comment and registration can be e.g. footer
$reg_p--;
}
}
}
sub c_comment {
my ($comment) = @_;
if ($comment eq ' sekce abclinuxu ') {
$after_sekce_abc=1;
}
}
# print UID
if ($uidtext =~ /\/(\d+)/) {
print "$1\t";
}
else {
print "-1\t";
}
# print registration date
if ($regtext =~ /(\d+)\.\s*(\d+)\.\s*(\d+)/ ) {
printf "%d-%02d-%02d\n", $3, $2, $1;
} else {
print "?\n"
}