View unanswered posts    View active topics

All times are UTC - 6 hours





Post new topic Reply to topic  [ 19 posts ] 
Go to page Previous  1, 2

Print view Previous topic   Next topic  
Author Message
Search for:
 Post subject:
PostPosted: Mon Jul 05, 2010 9:41 pm 
Offline
Joined: Mon Feb 27, 2006 10:39 pm
Posts: 48
Found a replacement grabber that seems to work for me;

http://github.com/Jonty/Googlemovies/bl ... emovies.pl

Code:
#!/usr/bin/perl
use warnings;
use strict;

use LWP::Simple;
use HTML::Entities;
use HTML::TreeBuilder;
use XML::Writer;

# Google url. You shoudn't need to change this unless fetching totally fails.
# Just change the domain to your local google, i.e. google.com to google.de
my $googleurl = "http://www.google.com/movies?near=";

# Set to 1 to fetch only first page of results
my $fetch_pages = 10;

# Otherwise we can get complaints when unicode is output
binmode STDOUT, ':utf8';

# Fetch the postcode/location to use from the args
# You can also use city name, "New York", "London"
my $location = join '+', @ARGV; # join args with '+' to be able to pass i.e. "New+York" in the url

if (!$location) {
    print "No postcode/location passed in arguments!\n";
    exit;
}

my $out = '';
my $xml = new XML::Writer(
    OUTPUT => $out,
    DATA_MODE => 1,
    DATA_INDENT => 2
);

$xml->xmlDecl();
$xml->startTag('MovieTimes');

my $start = 0;
parse_html(fetch_html($googleurl.$location));

$xml->endTag(); # MovieTimes
$xml->end();

# Tada!
print $out;

sub fetch_html {
    my $response = get(shift() . '&start='.$start);

    if (!defined $response) {
        print "Failed to fetch movie times, did you pass a valid postcode?\n";
        exit;
    }

    return $response;
}

sub parse_html {
    my $tree = HTML::TreeBuilder->new();
    $tree->parse(shift);
    $tree->eof;

    my @rows = $tree->look_down('_tag', 'div', class => 'theater');
    foreach my $row (@rows) {
        $xml->startTag('Theater');
        $xml = parse_cinema($xml, $row);

        my @movierows = $row->look_down('_tag', 'div', class => 'movie');
        $xml->startTag('Movies');
        $xml = parse_movies($xml, @movierows);
        $xml->endTag(); # Movies

        $xml->endTag(); # Theater
    }

    if (--$fetch_pages > 0) {
        my $url = parse_navbar($tree);
        if ($url) {
            parse_html(fetch_html($url)) if $url;
        }
    }
}

sub parse_navbar {
    my $tree = shift;
    my $next_start = $start+10;
    my $return_url;
    my $rooturl = $googleurl;
    $rooturl =~s/^(http:...*?)(\/.*)$/$1/i;

    # look for a link with 'start=$nextstart'
    if (my $navbar = $tree->look_down('_tag', 'div', id => 'navbar')) {
        my @links = $navbar->look_down('_tag', 'a');
        foreach my $a (@links) {
            if ($a->attr('href') =~/^\/movies\?.*start=$next_start$/) {
                if ($a->attr('href') !~/^http:/) {
                    $return_url = $rooturl.$a->attr('href');
                } else {
                    $return_url = $a->attr('href');
                }
                $start = $next_start;
                last;
            }
        }
    }
    return $return_url;
}

sub parse_cinema {
    my ($xml, $cinema) = @_;

    my $name = ($cinema->look_down('_tag', 'h2', class => 'name'))[0]->as_text;
    $name =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp
    $xml->dataElement('Name', $name);

    my $address = ($cinema->look_down('_tag', 'div', class => 'info'))[0]->as_text;
    $address =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp
    $xml->dataElement('Address', $address);

    return $xml;
}

sub parse_movies {
    my $xml = shift;

    foreach my $movierow (@_) {
        $xml->startTag('Movie');

        my $name = ($movierow->look_down('_tag', 'div', class => 'name'))[0]->as_text;
        $xml->dataElement('Name', $name);

        my $info = ($movierow->look_down('_tag', 'span', class => 'info'))[0]->as_text;
        if ($info) {
            my @imgs = $movierow->look_down('_tag', 'img');
            foreach my $img (@imgs) {
                if ($img->attr('alt') =~ /(\d.*$)/i) {
                    $xml->dataElement('Rating', $1);
                }
            }

            if ($info =~ /(\d+\w+\s*\d+\w+)/i) {
                $xml->dataElement('RunningTime', $1);
            }
        }

        my $showtimes = ($movierow->look_down('_tag', 'div', class => 'times'))[0]->as_text;
        if ($showtimes) {
            $showtimes =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp

            # Occasionally this line also contains information about subtitles
            $showtimes =~ /^(.*?)(\d.*$)/;
            my ($info, $times) = ($1, $2);
            $times =~ s/\s+/, /g;

            $xml->dataElement('ShowTimes', $info.$times);
        }

        $xml->endTag(); #Movie
    }

    return $xml;
}



Top
 Profile  
 
 Post subject:
PostPosted: Tue Jul 06, 2010 10:26 am 
Offline
Joined: Mon Apr 10, 2006 3:48 pm
Posts: 997
Location: Lexington, Ky
Great find! It works great and simple to implement.

In Settings just replace
Code:
Grabber:  /usr/bin/ignyte --zip %z --radius %r

with
Code:
Grabber:  /path_2_file/googlemovies.pl --zip %z --radius %r

and your all set.

Thanks huntermcdole!


Top
 Profile  
 
 Post subject:
PostPosted: Tue Jul 06, 2010 11:09 am 
Offline
Joined: Mon Feb 27, 2006 10:39 pm
Posts: 48
I just used
Code:
/path_2_file/googlemovies.pl %z
not sure it supports the others


Top
 Profile  
 
 Post subject:
PostPosted: Tue Jul 06, 2010 7:53 pm 
Offline
Joined: Mon Dec 24, 2007 9:47 am
Posts: 535
Location: Ottawa, Canada
huntermcdole wrote:
I just used
Code:
/path_2_file/googlemovies.pl %z
not sure it supports the others

This is correct, the script only expects the %z option.

Very nice find BTW, now I can actually use the movie listings! Now if only I could find a way to expand the range. "near" for me is not "near" enough. Another 10km range would be perfect.


Top
 Profile  
 

Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 19 posts ] 
Go to page Previous  1, 2



All times are UTC - 6 hours




Who is online

Users browsing this forum: No registered users and 22 guests


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot post attachments in this forum

Jump to:  
cron
Powered by phpBB® Forum Software © phpBB Group

Theme Created By ceyhansuyu