euscan: fix #435118 and #435120

Signed-off-by: Corentin Chary <corentin.chary@gmail.com>
2012-09-15 23:20:30 +02:00
parent 89f3192937
commit 9afb31fc65
4 changed files with 23 additions and 20 deletions
--- a/bin/euscan
+++ b/bin/euscan
@@ -287,9 +287,9 @@ def main():
            from os.path import basename  # To get the short name
-            output.error(
+            output.eerror(
                "The short ebuild name '%s' is ambiguous. Please specify" %
-                    basename(pkgs[0]),
+                    basename(pkgs[0]) +
                "one of the above fully-qualified ebuild names instead."
            )
            exit_helper(1)
--- a/pym/euscan/init.py
+++ b/pym/euscan/init.py
@@ -77,11 +77,12 @@ ROBOTS_TXT_BLACKLIST_DOMAINS = [
    '(.*)sourceforge(.*)',
    '(.*)github.com',
    '(.*)berlios(.*)',
-    '(.*)qt.nokia.com(.*)',
+    '(.*)qt\.nokia\.com(.*)',
-    '(.*)chromium.org(.*)',
+    '(.*)chromium\.org(.*)',
-    '(.*)nodejs.org(.*)',
+    '(.*)nodejs\.org(.*)',
-    '(.*)download.mono-project.com(.*)',
+    '(.*)download\.mono-project\.com(.*)',
-    '(.*)fedorahosted.org(.*)',
+    '(.*)fedorahosted\.org(.*)',
    '(.*)download\.tuxfamily\.org(.*)',
 ]
 from out import EuscanOutput
--- a/pym/euscan/handlers/generic.py
+++ b/pym/euscan/handlers/generic.py
@@ -70,6 +70,7 @@ def scan_html(data, url, pattern):
                 match.group(0))
            )
    return results
--- a/pym/euscan/helpers.py
+++ b/pym/euscan/helpers.py
@@ -373,27 +373,28 @@ def tryurl(fileurl, template):
 def regex_from_template(template):
    # Escape
-    template = re.escape(template)
+    regexp = re.escape(template)
    # Unescape specific stuff
-    template = template.replace('\$\{', '${')
+    regexp = regexp.replace('\$\{', '${')
-    template = template.replace('\}', '}')
+    regexp = regexp.replace('\}', '}')
-    template = template.replace('}\.$', '}.$')
+    regexp = regexp.replace('}\.$', '}.$')
    # Replace ${\d+}
-    #template = template.replace('${0}', r'([\d]+?)')
+    #regexp = regexp.replace('${0}', r'([\d]+?)')
-    template = re.sub(r'(\$\{\d+\}(\.?))+', r'([\w\.]+?)', template)
+    regexp = re.sub(r'(\$\{\d+\}(\.?))+', r'([\w\.]+?)', regexp)
-    #template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', template)
+    #regexp = re.sub(r'(\$\{\d+\}\.?)+', r'([\w]+?)', regexp)
-    #template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template)
+    #regexp = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', regexp)
-    #template = re.sub(r'(\$\{\d+\})+', '(.+?)', template)
+    #regexp = re.sub(r'(\$\{\d+\})+', '(.+?)', regexp)
    # Full version
-    template = template.replace('${PV}', _v)
+    regexp = regexp.replace('${PV}', _v)
    # End
-    template = template + r'/?$'
+    regexp = regexp + r'/?$'
-    return template
+
    return regexp
 def basedir_from_template(template):
@@ -417,7 +418,7 @@ def generate_scan_paths(url):
    path = prefix + ":/"
    for chunk in chunks:
        if '${' in chunk:
-            steps.append((path, regex_from_template(chunk)))
+            steps.append((path, '^(?:|.*/)'  + regex_from_template(chunk)))
            path = ""
        else:
            path += "/"