From 9da62b211b95f087ca7100abdffc2eda85c0d54a Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Sat, 10 Sep 2011 08:23:46 +0200 Subject: [PATCH] euscan: fix some robots.txt issues - disable checks for ftp - fail silently - use einfo and not eerror Signed-off-by: Corentin Chary --- pym/euscan/helpers.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index ef7f4c8..5285fde 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -217,7 +217,7 @@ def urlallowed(url): protocol, domain = urlparse.urlparse(url)[:2] - if 'protocol' == 'ftp': + if protocol == 'ftp': return True baseurl = '%s://%s' % (protocol, domain) @@ -228,8 +228,11 @@ def urlallowed(url): else: rp = robotparser.RobotFileParser() rp.set_url(robotsurl) - rp.read() - rpcache[baseurl] = rp + try: + rp.read() + rpcache[baseurl] = rp + except: + return True return rp.can_fetch(CONFIG['user-agent'], url) def urlopen(url, timeout=None, verb="GET"): @@ -253,7 +256,7 @@ def tryurl(fileurl, template): result = True if not urlallowed(fileurl): - output.eerror("Url '%s' blocked by robots.txt" % fileurl) + output.einfo("Url '%s' blocked by robots.txt" % fileurl) return None output.ebegin("Trying: " + fileurl)