diff --git a/bin/euscan_patch_metadata b/bin/euscan_patch_metadata index b5649ed..0de4239 100755 --- a/bin/euscan_patch_metadata +++ b/bin/euscan_patch_metadata @@ -11,6 +11,7 @@ import logging import shutil import subprocess +from portage.exception import AmbiguousPackageName from gentoolkit.query import Query from BeautifulSoup import BeautifulSoup, SoupStrainer @@ -133,40 +134,60 @@ def get_deb_url(name): def patch_metadata(metadata_path, watch_data, diff=False): - watch_data = "\n".join([line for line in watch_data.split("\n") - if not line.startswith("#")]) # comments - watch_data = watch_data.replace("\\\n", "") # remove backslashes - watch_data = " ".join(watch_data.split()) # remove extra spaces and \n - - result = re.match( - r'(version=\d+?) (?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', watch_data - ) - - version, attrs_quote, attrs, url = result.groups() - attrs = attrs_quote or attrs - - if attrs: - attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")] - attrs = " ".join(attrs) + logger.info(" Patching metadata file") with open(metadata_path) as fp: original = fp.read() rindent, indent = guess_indent_values(original) - data = original - logger.info(" Patching metadata file") + # clean watch_data + watch_data = "\n".join([line for line in watch_data.split("\n") + if not line.startswith("#")]) # comments - if attrs: - watch_tag = '%s%s' % (indent, version, attrs, url) - else: - watch_tag = '%s%s' % (indent, version, url) + watch_data = watch_data.replace("\\\n", "") # remove backslashes + + watch_tags = [] + + for watch_line in watch_data.split("\n"): # there can be multiple lines + watch_line = " ".join(watch_line.split()) # remove extra spaces and \n + + version_parse = re.match("version=(\d+?)", watch_line) + if version_parse: + version = version_parse.group(1) + continue + + if not watch_line: # skip empty lines + continue + + # parse watch_line + result = re.match( + r'(?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', + watch_line + ) + + attrs_quote, attrs, url = result.groups() + attrs = attrs_quote or attrs + + if attrs: + attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")] + attrs = " ".join(attrs) + + if attrs: + watch_tag = '%s%s' % \ + (indent, version, attrs, url) + else: + watch_tag = '%s%s' % \ + (indent, version, url) + watch_tags.append(watch_tag) + + watch_tags = "\n".join(watch_tags) if '' in data: - data = data.replace('', '\n%s' % watch_tag, 1) + data = data.replace('', '\n%s' % watch_tags, 1) else: rep = '%s\n%s\n%s\n' % \ - (rindent, watch_tag, rindent) + (rindent, watch_tags, rindent) data = data.replace('', rep, 1) if not diff: @@ -183,14 +204,18 @@ def patch_metadata(metadata_path, watch_data, diff=False): def process_package(query, diff=False): - matches = Query(query).smart_find( - in_installed=True, - in_porttree=True, - in_overlay=True, - include_masked=True, - show_progress=False, - no_matches_fatal=False, - ) + try: + matches = Query(query).smart_find( + in_installed=True, + in_porttree=True, + in_overlay=True, + include_masked=True, + show_progress=False, + no_matches_fatal=False, + ) + except AmbiguousPackageName: + logger.error(" Ambiguous package name") + return None if len(matches) == 0: logger.error(" Package not found") @@ -224,7 +249,9 @@ def main(): for package in packages: logger.info("Processing %s..." % package) - print process_package(package, opts.diff) + result = process_package(package, opts.diff) + if result: + print result if __name__ == "__main__": main() diff --git a/euscanwww/djeuscan/views.py b/euscanwww/djeuscan/views.py index af7d7be..3d96cb0 100644 --- a/euscanwww/djeuscan/views.py +++ b/euscanwww/djeuscan/views.py @@ -250,7 +250,7 @@ def package(request, category, package): 'upstream': upstream, 'log': log, 'vlog': vlog, - 'msg' : msg, + 'msg': msg, 'last_scan': last_scan, 'favourited': favourited, 'refreshed': refreshed, diff --git a/pym/euscan/handlers/cpan.py b/pym/euscan/handlers/cpan.py index 091c64c..a54641f 100644 --- a/pym/euscan/handlers/cpan.py +++ b/pym/euscan/handlers/cpan.py @@ -7,7 +7,7 @@ from euscan import helpers, output HANDLER_NAME = "cpan" CONFIDENCE = 100.0 -PRIORITY = 100 +PRIORITY = 90 _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py index a9a3048..831dfce 100644 --- a/pym/euscan/handlers/generic.py +++ b/pym/euscan/handlers/generic.py @@ -1,3 +1,4 @@ +from urlparse import urljoin import urllib2 import re import StringIO @@ -34,8 +35,7 @@ def scan_html(data, url, pattern): match = re.match(pattern, href, re.I) if match: - results.append((match.group(1), match.group(0))) - + results.append((".".join(match.groups()), match.group(0))) return results @@ -47,7 +47,7 @@ def scan_ftp(data, url, pattern): line = line.replace("\n", "").replace("\r", "") match = re.search(pattern, line, re.I) if match: - results.append((match.group(1), match.group(0))) + results.append((".".join(match.groups()), match.group(0))) return results @@ -77,7 +77,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): results = [] - if re.search("<\s*a\s+[^>]*href", data): + if re.search("<\s*a\s+[^>]*href", data, re.I): results.extend(scan_html(data, url, pattern)) elif url.startswith('ftp://'): results.extend(scan_ftp(data, url, pattern)) @@ -88,11 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url): pv = helpers.gentoo_mangle_version(up_pv) if helpers.version_filtered(cp, ver, pv): continue - - if not url.endswith('/') and not path.startswith('/'): - path = url + '/' + path - else: - path = url + path + path = urljoin(url, path) if not steps and path not in orig_url: versions.append((path, pv, HANDLER_NAME, CONFIDENCE)) diff --git a/pym/euscan/handlers/github.py b/pym/euscan/handlers/github.py index 76c50c9..9bb5596 100644 --- a/pym/euscan/handlers/github.py +++ b/pym/euscan/handlers/github.py @@ -8,7 +8,7 @@ from euscan import helpers, output HANDLER_NAME = "github" CONFIDENCE = 100.0 -PRIORITY = 100 +PRIORITY = 90 def can_handle(pkg, url): diff --git a/pym/euscan/handlers/kde.py b/pym/euscan/handlers/kde.py index 1dcead6..21722bb 100644 --- a/pym/euscan/handlers/kde.py +++ b/pym/euscan/handlers/kde.py @@ -1,6 +1,6 @@ from euscan.handlers import generic -PRIORITY = 100 +PRIORITY = 90 HANDLER_NAME = "kde" @@ -14,7 +14,7 @@ def can_handle(pkg, url): def clean_results(results): ret = [] - for path, version, confidence in results: + for path, version, _, confidence in results: if version == '5SUMS': continue ret.append((path, version, HANDLER_NAME, confidence)) diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py index 36bf3d7..6b74ff6 100644 --- a/pym/euscan/handlers/php.py +++ b/pym/euscan/handlers/php.py @@ -7,7 +7,7 @@ from euscan import helpers, output HANDLER_NAME = "php" CONFIDENCE = 100.0 -PRIORITY = 100 +PRIORITY = 90 def can_handle(pkg, url): diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py index 74aaeb3..9cd1620 100644 --- a/pym/euscan/handlers/pypi.py +++ b/pym/euscan/handlers/pypi.py @@ -7,7 +7,7 @@ from euscan import helpers, output HANDLER_NAME = "pypi" CONFIDENCE = 100.0 -PRIORITY = 100 +PRIORITY = 90 def can_handle(pkg, url): diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py index 529e6d4..39e2334 100644 --- a/pym/euscan/handlers/rubygem.py +++ b/pym/euscan/handlers/rubygem.py @@ -7,7 +7,7 @@ from euscan import helpers, output HANDLER_NAME = "rubygem" CONFIDENCE = 100.0 -PRIORITY = 100 +PRIORITY = 90 def can_handle(pkg, url): diff --git a/pym/euscan/handlers/watch.py b/pym/euscan/handlers/watch.py new file mode 100644 index 0000000..d172072 --- /dev/null +++ b/pym/euscan/handlers/watch.py @@ -0,0 +1,139 @@ +import re +import urllib2 + +import portage + +from euscan.handlers import generic +from euscan import output, helpers + +PRIORITY = 100 + +HANDLER_NAME = "watch" +CONFIDENCE = 100.0 + + +is_pattern = r"\([^\/]+\)" + + +def can_handle(pkg, url): + try: + return pkg.metadata._xml_tree.find("upstream").find("watch") \ + is not None + except AttributeError: + return False + + +def parse_mangles(mangles, string): + for mangle in mangles: + # convert regex from perl format to python format + m = re.match(r"s/(.*[^\\])/(.*)/", mangle) + pattern, repl = m.groups() + repl = re.sub(r"\$(\d+)", r"\\\1", repl) + string = re.sub(pattern, repl, string) + return string + + +def clean_results(results, versionmangle, urlmangle): + ret = [] + + for path, version, _, _ in results: + version = parse_mangles(versionmangle, version) + path = parse_mangles(urlmangle, path) + ret.append((path, version, HANDLER_NAME, CONFIDENCE)) + + return ret + + +def parse_watch(pkg): + for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"): + try: + base, file_pattern = watch_tag.text.split(" ")[:2] + except ValueError: + base, file_pattern = watch_tag.text, None + + # the file pattern can be in the base url + pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$" + match = re.search(pattern_regex, base) + if match: + file_pattern = match.group(1) + base = base.replace(file_pattern, "") + + # handle sf.net specially + base = base.replace( + "http://sf.net/", "http://qa.debian.org/watch/sf.php/" + ) + + vmangle = watch_tag.attrib.get("uversionmangle", None) or \ + watch_tag.attrib.get("versionmangle", None) + versionmangle = vmangle.split(";") if vmangle else [] + + umangle = watch_tag.attrib.get("downloadurlmangle", None) + urlmangle = umangle.split(";") if umangle else [] + + yield (base, file_pattern, versionmangle, urlmangle) + + +def handle_directory_patterns(base, file_pattern): + """ + Directory pattern matching + e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/ + file_pattern: nessus-core-([\d\.]+)\.tar\.gz + """ + splitted = base.split("/") + i = 0 + basedir = [] + for elem in splitted: + if re.search(is_pattern, elem): + break + basedir.append(elem) + i += 1 + basedir = "/".join(basedir) + directory_pattern = splitted[i] + final = "/".join(splitted[i + 1:]) + + try: + fp = helpers.urlopen(basedir) + except urllib2.URLError: + return [] + except IOError: + return [] + + if not fp: + return [] + + data = fp.read() + + if basedir.startswith("ftp://"): + scan_data = generic.scan_ftp(data, basedir, directory_pattern) + else: + scan_data = generic.scan_html(data, basedir, directory_pattern) + + return [("/".join((basedir, path, final)), file_pattern) + for _, path in scan_data] + + +def scan(pkg, url): + output.einfo("Using watch data") + + cp, ver, rev = portage.pkgsplit(pkg.cpv) + + results = [] + for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg): + if not re.search(is_pattern, base): + steps = [(base, file_pattern)] + res = generic.scan_directory_recursive( + cp, ver, rev, "", steps, url + ) + else: + res = [] + for step in handle_directory_patterns(base, file_pattern): + res += generic.scan_directory_recursive( + cp, ver, rev, "", [step], url + ) + + results += clean_results(res, versionmangle, urlmangle) + return results + + +def brute_force(pkg, url): + return [] diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py index 1e385cd..6582393 100644 --- a/pym/euscan/helpers.py +++ b/pym/euscan/helpers.py @@ -33,6 +33,7 @@ VERSION_CMP_PACKAGE_QUIRKS = { _v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)' _v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))' + # Stolen from g-pypi def gentoo_mangle_version(up_pv): """Convert PV to MY_PV if needed @@ -537,6 +538,7 @@ def generate_scan_paths(url): return steps + def parse_mirror(uri): from random import shuffle