euscan: fix some robots.txt issues

- disable checks for ftp - fail silently - use einfo and not eerror Signed-off-by: Corentin Chary <corentincj@iksaif.net>
2011-09-10 08:23:46 +02:00
parent c5af0e1937
commit 9da62b211b
1 changed files with 7 additions and 4 deletions
--- a/pym/euscan/helpers.py
+++ b/pym/euscan/helpers.py
@@ -217,7 +217,7 @@ def urlallowed(url):

    protocol, domain = urlparse.urlparse(url)[:2]

-    if 'protocol' == 'ftp':
+    if protocol == 'ftp':
        return True

    baseurl = '%s://%s' % (protocol, domain)
@@ -228,8 +228,11 @@ def urlallowed(url):
    else:
        rp = robotparser.RobotFileParser()
        rp.set_url(robotsurl)
-        rp.read()
-        rpcache[baseurl] = rp
+        try:
+            rp.read()
+            rpcache[baseurl] = rp
+        except:
+            return True
    return rp.can_fetch(CONFIG['user-agent'], url)

 def urlopen(url, timeout=None, verb="GET"):
@@ -253,7 +256,7 @@ def tryurl(fileurl, template):
    result = True

    if not urlallowed(fileurl):
-        output.eerror("Url '%s' blocked by robots.txt" % fileurl)
+        output.einfo("Url '%s' blocked by robots.txt" % fileurl)
        return None

    output.ebegin("Trying: " + fileurl)