#!/usr/bin/perl # Copyright © 2011, 2012, 2013, 2014 Petr Písař # This is free software. You may redistribute copies of it under the terms of # the GNU General Public License . # There is NO WARRANTY, to the extent permitted by law. # Changelog: # # Version 20 # - Catch up to major changes on the server side. Not all features are # available yet. # # Version 19 # - Send `type' element not to confuse SMIL playlist genererator as it is # needed since 2014-05-05. # # Version 18 # - Fix locating JSON. The affected RTMP variant got another argument. # # Version 17 # - Fix request for RTMP playlist generator by not sending undefined # streamQuality. # # Version 16 # - Recognize an error message in new format. # - Respect *_proxy environment variables. # # Version 15 # - Implement short path when overloaded server returns SMIL URL directly. # # Version 14 # - Adapt to server changes effective since 2014-02-13. Only RTMP is supported # now. The Apple format is unsupported. # - Require URI::Escape Perl module. # # Version 13 # - Rewrite SMIL playlist URL as performed by web interface since 2014-01-31. # # Version 12 # - Search SMIL playlist generator URL from web pages as the address changes # frequently. # # Version 11 # - Implement change of SMIL playlist generator URL as spotted on 2014-01-10. # Thanks to Jiri B. # - Set output encoding for diganostic messages according to locale as # messages extracted from web pages are in Czech. # # Version 10 # - Implement change of SMIL playlist generator URL as spotted on 2014-01-07. # # Version 9 # - Search for other error message if JSON could not be found. This usually # says the client is not supported (not all URLs serves all formats). # - Improve JSON locator # (http://www.ceskatelevize.cz/ivysilani/1126672097-otazky-vaclava-moravce/). # # Version 8 # - Adapt to changes in SMIL play-list generator on the Czech Television # servers. Effective since 2013-05-24. # - New option `-v' has been added to report retrieved URLs. # # Version 7 # - Print stream data in binaru mode. This fixes premature end-of-file on # DOS-like operating systems. # # Version 6 # - Accept iframe player URL with channel name in the file name # - Send XML-RPC header needed for SMIL generator since 2012-05-09 # # Version 5 # - Support Apple M3U and MPEG2-TS/MPEG-4 format # - Improve HTML parser # - Remove autoStart=false option from iframe player URL to get page with # JSON instead of similar page with iframe again # (http://www.ceskatelevize.cz/porady/10316155327-horizont-ct24/) # # Version 4 # - Support RTMP app with slashes # - Report URI in error messages # # Version 3: # - Output in rtmpdump(1) format if `-d' option is specified # - Do not append playpath to URL if ambigous # # Version 2: # - Output playpath as librtmp option if necessary # (http://www.ct24.cz/vysilani/10099403120-kultura-v-regionech/) # - Perl 5.10 support # - Find JSON via iframe first and fall back to direct JSON # - More general example entry page URL in usage output # - Show content provider error message if exists # use strict; use warnings; use utf8; use open ':locale'; our $VERSION = 20; use LWP::UserAgent; use HTTP::Request::Common; use HTTP::Response; use XML::XPath; use URI; use URI::Escape; use JSON 2.0; use Getopt::Std; use IO::Handle; my $SMIL_GENERATOR_NEXT = '/ajax/getSmil.php?url='; my $ENTRY = 'http://www.ceskatelevize.cz/ivysilani/zive/ct24/'; sub usage { return<). If this is the only argument, output list of all available streams in format `STREAM_BITRATE: URL' separated by new line. If STREAM_BITRATE is given, output URL of the stream with given rate only. By default, URL of RTMP/FLV/MPEG-4 stream is printed in librtmp(3) format (space-separated librtmp options may follow the URL and all of them must be passed as one argument to librtmp application). If `-d' option is specified, URL with possible arguments are printed as rtmpdump(1) arguments. Note ampersands are kept literal (this should work in simple subshell substition). If `-A' option is specified, HTTP/MPEG-TS/MPEG-4 video will be retrieved. There exist three levels selected by second option: -l Apple M3U play-list pointing to unbound stream segments will be output. This is handy if you have a player that supports the very special play-list. Specification can be found on . -f The Apple play-list will be processed and URLs of the underlying stream segmented to files will be printed. Due to nature of the play-list, the locators are printed periodically, a new segment a few seconds, possibly in endless loop. Intended workflow is to pipe the URLs to an HTTP client which echoes downloaded segments on standard input of a multimedia player. This is the default level. The bit rate must be specified, if more bit rates are available. -s The stream segments will be retrieved and dumped to standard output as continuous stream. You can pipe it your player. The bit rate must be specified, if more bit rates are available. If `-v' option is specified, additional debugging messages will be printed on standard error output. Version: $VERSION. Copyright © 2011, 2012, 2013, 2014 Petr Písař This is free software. You may redistribute copies of it under the terms of the GNU General Public License . There is NO WARRANTY, to the extent permitted by law. EOM } our ($opt_A, $opt_d, $opt_l, $opt_f, $opt_s, $opt_v); getopts('Adlfsv') or die usage; if ($#ARGV < 0 || $#ARGV > 1) { die "Bad invocation\n\n" . usage; } $ENTRY = $ARGV[0]; my $BITRATE = $ARGV[1]; if (!($opt_l || $opt_s)) { $opt_f = 1; } # each that operates on reference to array or hash # Works with perl 5.10.1 too. sub eachref { my $ref = shift; if (ref $ref eq 'HASH') { # Built-in implementation always supports HASH return sub { each %$ref; } } if (eval 'each @$ref' ) { # Built-in Perl 5.12 implementation eval 'return sub { each @$ref; }' } else { # Manual implementation of each ARRAY (needed for Perl < 5.12) my $index = -1; return sub { $index++; if ($index <= $#$ref) { ($index, $$ref[$index]); } else { (); } } } } # Convert nested JSON structure expressed as native hash reference into flat # array of key and value pairs. # E.g. { "x" => [ "y" => "1", "z" => undef ] } # becomes ( "x[0][y]", "1", "x[1][z] => null ). # This is handy when sending nested JSON structure as # application/x-www-form-urlencoded by HTTP::Request::Common. sub flatten { my ($ref, $prefix) = @_; my @output = (); my $doeach = eachref($ref); while (my ($key, $val) = &$doeach) { # TODO: Escape /[[]=]/ my $id = (defined $prefix) ? $prefix . '[' . $key . ']' : $key; if (ref $val eq 'HASH' || ref $val eq 'ARRAY') { push @output, flatten($val, $id); } else { push @output, ($id, $val // 'null'); } } return @output; } # Format RTMP URL for librtmp sub formaturl_librtmp { my ($rtmp, $app, $playpath) = @_; my $stream_url = $rtmp; if ($playpath =~ qr{/} or $app =~ qr{/}) { $stream_url .= ' app=' . $app . ' playpath=' . $playpath; } else { $stream_url .= $app . '/' . $playpath; } } # Format RTMP URL for librtmp sub formaturl_rtmpdump { my ($rtmp, $app, $playpath) = @_; my $stream_url = '--rtmp ' . $rtmp; if ($playpath =~ qr{/} or $app =~ qr{/}) { $stream_url .= ' --app ' . $app . ' --playpath ' . $playpath; } else { $stream_url .= $app . '/' . $playpath; } } # Find first pattern match in HTML page, HTML-unescape it and return it. # Otherwise return undef. sub htmlgrep { my ($html_page, $pattern) = @_; my ($text) = ($html_page =~ $pattern); if (defined $text) { $text =~ s/>/>/g; $text =~ s/</new(shift); s/&/%26/g; return $_; } # Try to get JSON request data from HTML page text. # The page is passed as a first argument. The JSON is located by javascript # function identifier passed as a second argument. # Return the JSON data or undef. # Example: callSOAP({"foo);bar":1}); sub findjson { my ($text, $function) = @_; htmlgrep($text, qr{ # Use possesive quantifiers ++, *+ for performance \Q$function\E \( ( # The JSON structure is a (?: # sequence of quoted strings "(?: [^"\\]++ | \\. )*+" | [^)] # and non-quoted )*+ # non-parentheses. ) # \); }x); } # Try to get setRequestHeader function arguments from JS page text passed as # argument. # Return list (header, value) or undef. sub findrequestheader { local $_ = findjson(shift, 'setRequestHeader'); if (!defined) { return undef; } return (m/'([^']*)', '([^']*)'/); } # Return array of { bitrate => INTEGER, url => URL } found in RTMP SMIL play # list. Arguments is playlist as string, URL of the playlist, # playlist as XML::XPath object and boolean signaling URL format (true for # rtmpdump format, false for librtmp format). sub extract_urls_from_rtmp_smil { my ($smil, $smil_url, $parser, $opt_d) = @_; my $videos = $parser->find('/data/smilRoot/body/switchItem/video[@enabled=true()]'); if ($videos->size <= 0) { die "No videos found in SMIL playlist <" . $smil_url . ">:\n" . $smil . "\n"; } my @bitrate_url_pairs = (); foreach my $video ($videos->get_nodelist) { my $suffix = $video->getAttribute('src'); if (! defined $suffix) { print STDERR q{Missing `video/@src' attribute} . "\n"; next; } my $prefix = $video->getParentNode->getAttribute('base'); if (! defined $suffix) { print STDERR q{Missing `video/../@base' attribute for video } . "`$suffix'\n"; next; } my $bitrate = $video->getAttribute('system-bitrate'); if (! defined $suffix) { print STDERR q{Missing `video/@system-bitrate' attribute for video } . "`$suffix'\n"; next; } # Build stream URL. Because RTMP URL can be ambigous, # applications accept aditional arguments separated by space # (the space must not be URI-encoded). my $stream_url; { my $rtmp = URI->new($prefix); my $app = substr($rtmp->path_query, 1); $rtmp->path('/'); $rtmp->query(undef); my $playpath = URI->new($suffix); if ($opt_d) { $stream_url = formaturl_rtmpdump($rtmp, $app, $playpath); } else { $stream_url = formaturl_librtmp($rtmp, $app, $playpath); } } ## Build stream URL. This is now an HTTP URL. #my $stream_url = URI->new_abs($suffix, $prefix); # Store URL push @bitrate_url_pairs, {'bitrate' => $bitrate, 'url' => $stream_url}; } return @bitrate_url_pairs; } # Print segment URLs or their content found in bottom-level Apple M3U # play-list. This function can never return if live stream is served by # a server. # Arguments are URL of the playlist, LWP::UserAgent object, and boolean # signalling content of stream segments should be printed instead of their # URLs. # See . sub iterate_bottom_apple_m3u { my ($m3u_url, $ua, $stream_content) = @_; autoflush STDOUT 1; my $reload = 1; my $last_segment = -1; my $target_duration; while ($reload) { # Get bottom-level Apple MPEG play-list. my $response = $ua->request(GET $m3u_url); $response->is_success or die "Could not get bottom-level Apple M3U play-list from <" . $m3u_url . ">: " . $response->status_line . "\n"; my $retrieved_at = time; my $duration; my $sequence = 0; for (split(/(\r)?\n/, $response->decoded_content)) { if (! defined) { next; } if (/\A#EXT-X-TARGETDURATION:(\d+)/) { $target_duration = $1; next; } if (/\A#EXT-X-MEDIA-SEQUENCE:(\d+)/) { $sequence = $1; next; } if (/\A#EXT-X-ENDLIST\b/) { $reload = 0; next; } if (/\A#EXTINF:(\d+)/) { $duration = $1; next; } if (/\A[^#]/) { if (! defined $duration) { print STDERR 'Stray URL in bottom-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; next; } if ($sequence > $last_segment) { my $segment_url = URI->new_abs($_, $m3u_url); if ($stream_content) { # Get segment content. binmode STDOUT; $ua->set_my_handler('response_data', sub { print $_[3]; 1; }, 'm_code' => 2); my $response = $ua->request(GET $segment_url); $response->is_success or die "Could not get stream segment content from <" . $segment_url . ">: " . $response->status_line . "\n"; } else { print $segment_url, "\n"; } $last_segment = $sequence; } $duration = undef; $sequence++; next; } } if (! defined $target_duration || $last_segment == -1) { die 'No target duration or URL found in bottom-level Apple " . "play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; } if ($reload) { my $sleep = $target_duration - (time - $retrieved_at); if ($sleep > 0) { sleep $sleep; } } } } # Return array of { bitrate => INTEGER, url => URL } found in top-level # Apple M3U play-list. Returned URLs are locators of bottom-level Apple M3U # play-list for given bitrate. The play-list specification is on # . # Arguments are URL of the playlist and LWP::UserAgent object. sub extract_urls_from_top_apple_m3u { my ($m3u_url, $ua) = @_; my @bitrate_url_pairs = (); # Get top-level Apple MPEG playlist. my $response = $ua->request(GET $m3u_url); $response->is_success or die "Could not get top-level Apple M3U play-list from <" . $m3u_url . ">: " . $response->status_line . "\n"; my $bitrate; for (split(/(\r)?\n/, $response->decoded_content)) { if (! defined) { next; } if (/\A#EXT-X-STREAM-INF:(?:.*,)?BANDWIDTH=(\d+)/) { $bitrate = $1; next; } if (/\A[^#]/) { if (! defined $bitrate) { print STDERR 'Stray URL in top-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; next; } push @bitrate_url_pairs, {'bitrate' => $bitrate, 'url' => URI->new_abs($_, $m3u_url)}; $bitrate = undef; next; } } if ($#bitrate_url_pairs < 0) { die 'No URL found in top-level Apple play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; } return @bitrate_url_pairs; } # Return array of { bitrate => INTEGER, url => URL } found in Apple SMIL play # list. Arguments is playlist as string, URL of the playlist, # playlist as XML::XPath object, and LWP::UserAgent object. sub extract_urls_from_apple_smil { my ($smil, $smil_url, $parser, $ua) = @_; my $videos = $parser->find('/data/smilRoot/body/video'); if ($videos->size <= 0) { die "No videos found in SMIL playlist <" . $smil_url . ">:\n" . $smil . "\n"; } my @bitrate_url_pairs = (); foreach my $video ($videos->get_nodelist) { my $m3u_url = $video->getAttribute('src'); if (! defined $m3u_url) { print STDERR q{Missing `video/@src' attribute} . "\n"; next; } push @bitrate_url_pairs, extract_urls_from_top_apple_m3u($m3u_url, $ua); } return @bitrate_url_pairs; } # Return array of { bitrate => INTEGER, url => URL } found in # M3U play-list. Returned URLs are media URLs, bit-rates are dummy numbers. # Arguments are URL of the playlist and LWP::UserAgent object. sub extract_urls_from_m3u { my ($m3u_url, $ua) = @_; my @bitrate_url_pairs = (); # Get the playlist. my $response = $ua->request(GET $m3u_url); $response->is_success or die "Could not M3U play-list from <" . $m3u_url . ">: " . $response->status_line . "\n"; my $order = -1; my $separated; for (split(/(\r)?\n/, $response->decoded_content)) { if (! defined) { next; } if (/\A#EXTINF:/) { $order++; $separated = 1; next; } if (/\A[^#]/) { if (! defined $separated) { print STDERR 'Stray URL in M3U play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; next; } push @bitrate_url_pairs, {'bitrate' => $order, 'url' => URI->new_abs($_, $m3u_url)}; $separated = undef; next; } } if ($#bitrate_url_pairs < 0) { die 'No URL found in M3U play-list from <' . $m3u_url . ">:\n" . $response->decoded_content . "\n"; } return @bitrate_url_pairs; } # Get entry HTML page my $ua = LWP::UserAgent->new; $ua->env_proxy(); if ($opt_A) { $ua->agent('Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) ' . 'AppleWebKit/531.21.10 (KHTML, like Gecko) ' . 'Version/4.0.4 Mobile/7B314 Safari/531.21.10'); } if ($opt_v) { print STDERR "Getting <$ENTRY>\n"; } my $response = $ua->request(GET $ENTRY); $response->is_success or die "Could not get entry page from <" . $ENTRY . ">: " . $response->status_line . "\n"; my $page = $response->decoded_content; # Try to get SMIL URL from clientLink data structure my $smil_url; my $smil_url_is_m3u; my $client_link_code = htmlgrep($page, qr{var\s+clientLink\s+=\s+\{\s+([^\}]+)\}}); if (defined $client_link_code) { if ($opt_v) { print STDERR "Client link code found:\n"; print STDERR "===BEGIN===\n$client_link_code\n===END===\n"; } my %client_links = map({ if (/^(\w+)\s+:\s+'([^']+)'/) { ($1 => $2) } else { () } } split(/,\R/, $client_link_code)); if ($opt_A) { $smil_url = $client_links{ipad}; # FIXME: This is not everthing for CT4. CT4 has SMIL generator at # . Input for the # generator is this client_link. Or it isn't. I get 403 evend from in # the web browser. This looks like a server issue. #$smil_url .= '&rd=1'; } else { # Server does not support multiple bitrates via RTSP for live # streaming anymore. # # There is only one a low bit-rate RTSP for Android clients. And the # link is server as an M3U playlist. # The Adobe Flash Player client does not use RTSP anymore. It uses # "San Jose/Flash HTTP" with an intemediate binary manifest.f4m. # Because it delivers the same chunks as the Apple play list, user can # use the Apple play list instead. However the recorded programs are # still available with the old RTSP method. They have no client links, # one needs to use the SMIL playlist generator for them. $smil_url_is_m3u = 1; $smil_url = $client_links{android}; } if (defined $smil_url) { if ($opt_v) { print STDERR "Client link <$smil_url> selected.\n"; } goto HAVE_SMIL_URL; } } # Get AJAX request data my $ajax_data = findjson($page, 'getPlaylistUrl'); unless (defined $ajax_data && $ajax_data) { # Overloaded server returns SMIL URL directly. Check it here before # printing error message. $smil_url = htmlgrep($page, qr{(?(?:\s*<[^/>]*>)*(.*?)(?:}{\n}g; if ($message) { die "$message\n"; } } # else die in general way if ($opt_v) { print STDERR "===BEGIN INPUT===\n$page\n===END INPUT===\n"; } die "Could not find AJAX data structure\n"; } # The JSON structure is the first argument of getPlaylistUrl() which has # variadic number of arguments. $ajax_data =~ s/(?<=[{\]])(,[^,]*)*\z//; # Build 'data' AJAX structure from AJAX script and iframe sources # 'requestSource' and 'type' elements are constants defined in the iframe. # 'streamQuality' element is a parsed from $ENTRY URL query segment, but it is # not used now. Moreover it can be undefined and then jQuery will not # transport the key with undefined value. # 'addCommercials' element is defined only sometimes in the ifram source and # not needed now. # XXX: Server checks for unexpected keys or values and bails out with 500. Do # not send undefined streamQuality or addCommercials. my $requestUrl = URI->new($ENTRY)->path; my $json_data = qq( { "playlist" : $ajax_data, "requestUrl" : "$requestUrl", "requestSource" : "iVysilani", "type" : "flash" } ); # Get XML-RPC header definition my ($xmlrpc_header, $xmlrpc_value) = findrequestheader($page); if (!defined $xmlrpc_header or !defined $xmlrpc_value) { if ($opt_v) { print STDERR "===BEGIN INPUT===\n$page\n===END INPUT===\n"; } print STDERR "XML-RPC header definition not found.\n"; } elsif ($opt_v) { print STDERR "XML-RPC header found: <$xmlrpc_header>\n"; print STDERR "XML-RPC value found: <$xmlrpc_value>\n"; } # Decode JSON request data my $data; eval { $data = decode_json($json_data) } or die "Could not decode JSON string: $json_data: $@\n"; my @data = flatten($data); # Get callSOAP source URL my $script_url = htmlgrep($page, qr{