#!/bin/env perl use warnings; use strict; use HTML::Parser; # Prepare parser my $parser=HTML::Parser->new( api_version => 3, strict_comment => 1, unbroken_text => 1, utf8_mode => 1, start_h => [\&c_start, "tagname, attr, text"], end_h => [\&c_end, "tagname"], text_h => [\&c_text, "dtext"], comment_h => [\&c_comment, "token0"] ); my $text; my $comment; my $regtext; my $uidtext; my $after_sekce_abc=0; my $reg_p=0; # Parse profile web page $parser->parse_file(*STDIN) || die "HTML parser failed: $!"; sub c_start { my ($tagname, $attr, $origtext) = @_; if ($tagname eq 'p' && $after_sekce_abc && !$reg_p) { $reg_p++; } if ($tagname eq 'a' && defined $attr->{href} && $attr->{href} =~ /^\/muj_obsah\// && $after_sekce_abc) { $uidtext=$attr->{href}; $parser->eof; } } sub c_end { my ($tagname, $attr) = @_; if ($tagname eq 'p' && $reg_p == 1) { $reg_p++; #$after_sekce_abc=0; } } sub c_text { my ($text) = @_; if ($reg_p == 1) { if ($text =~ /Datum registrace/) { # Registraion data found, terminating parser $regtext=$text; } else { # Between comment and registration can be e.g. footer $reg_p--; } } } sub c_comment { my ($comment) = @_; if ($comment eq ' sekce abclinuxu ') { $after_sekce_abc=1; } } # print UID if ($uidtext =~ /\/(\d+)/) { print "$1\t"; } else { print "-1\t"; } # print registration date if ($regtext =~ /(\d+)\.\s*(\d+)\.\s*(\d+)/ ) { printf "%d-%02d-%02d\n", $3, $2, $1; } else { print "?\n" }