diff --git a/bin/euscan_patch_metadata b/bin/euscan_patch_metadata
index b5649ed..0de4239 100755
--- a/bin/euscan_patch_metadata
+++ b/bin/euscan_patch_metadata
@@ -11,6 +11,7 @@ import logging
import shutil
import subprocess
+from portage.exception import AmbiguousPackageName
from gentoolkit.query import Query
from BeautifulSoup import BeautifulSoup, SoupStrainer
@@ -133,40 +134,60 @@ def get_deb_url(name):
def patch_metadata(metadata_path, watch_data, diff=False):
- watch_data = "\n".join([line for line in watch_data.split("\n")
- if not line.startswith("#")]) # comments
- watch_data = watch_data.replace("\\\n", "") # remove backslashes
- watch_data = " ".join(watch_data.split()) # remove extra spaces and \n
-
- result = re.match(
- r'(version=\d+?) (?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', watch_data
- )
-
- version, attrs_quote, attrs, url = result.groups()
- attrs = attrs_quote or attrs
-
- if attrs:
- attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
- attrs = " ".join(attrs)
+ logger.info(" Patching metadata file")
with open(metadata_path) as fp:
original = fp.read()
rindent, indent = guess_indent_values(original)
-
data = original
- logger.info(" Patching metadata file")
+ # clean watch_data
+ watch_data = "\n".join([line for line in watch_data.split("\n")
+ if not line.startswith("#")]) # comments
- if attrs:
- watch_tag = '%s%s' % (indent, version, attrs, url)
- else:
- watch_tag = '%s%s' % (indent, version, url)
+ watch_data = watch_data.replace("\\\n", "") # remove backslashes
+
+ watch_tags = []
+
+ for watch_line in watch_data.split("\n"): # there can be multiple lines
+ watch_line = " ".join(watch_line.split()) # remove extra spaces and \n
+
+ version_parse = re.match("version=(\d+?)", watch_line)
+ if version_parse:
+ version = version_parse.group(1)
+ continue
+
+ if not watch_line: # skip empty lines
+ continue
+
+ # parse watch_line
+ result = re.match(
+ r'(?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)',
+ watch_line
+ )
+
+ attrs_quote, attrs, url = result.groups()
+ attrs = attrs_quote or attrs
+
+ if attrs:
+ attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
+ attrs = " ".join(attrs)
+
+ if attrs:
+ watch_tag = '%s%s' % \
+ (indent, version, attrs, url)
+ else:
+ watch_tag = '%s%s' % \
+ (indent, version, url)
+ watch_tags.append(watch_tag)
+
+ watch_tags = "\n".join(watch_tags)
if '' in data:
- data = data.replace('', '\n%s' % watch_tag, 1)
+ data = data.replace('', '\n%s' % watch_tags, 1)
else:
rep = '%s\n%s\n%s\n' % \
- (rindent, watch_tag, rindent)
+ (rindent, watch_tags, rindent)
data = data.replace('', rep, 1)
if not diff:
@@ -183,14 +204,18 @@ def patch_metadata(metadata_path, watch_data, diff=False):
def process_package(query, diff=False):
- matches = Query(query).smart_find(
- in_installed=True,
- in_porttree=True,
- in_overlay=True,
- include_masked=True,
- show_progress=False,
- no_matches_fatal=False,
- )
+ try:
+ matches = Query(query).smart_find(
+ in_installed=True,
+ in_porttree=True,
+ in_overlay=True,
+ include_masked=True,
+ show_progress=False,
+ no_matches_fatal=False,
+ )
+ except AmbiguousPackageName:
+ logger.error(" Ambiguous package name")
+ return None
if len(matches) == 0:
logger.error(" Package not found")
@@ -224,7 +249,9 @@ def main():
for package in packages:
logger.info("Processing %s..." % package)
- print process_package(package, opts.diff)
+ result = process_package(package, opts.diff)
+ if result:
+ print result
if __name__ == "__main__":
main()
diff --git a/euscanwww/djeuscan/views.py b/euscanwww/djeuscan/views.py
index af7d7be..3d96cb0 100644
--- a/euscanwww/djeuscan/views.py
+++ b/euscanwww/djeuscan/views.py
@@ -250,7 +250,7 @@ def package(request, category, package):
'upstream': upstream,
'log': log,
'vlog': vlog,
- 'msg' : msg,
+ 'msg': msg,
'last_scan': last_scan,
'favourited': favourited,
'refreshed': refreshed,
diff --git a/pym/euscan/handlers/cpan.py b/pym/euscan/handlers/cpan.py
index 091c64c..a54641f 100644
--- a/pym/euscan/handlers/cpan.py
+++ b/pym/euscan/handlers/cpan.py
@@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "cpan"
CONFIDENCE = 100.0
-PRIORITY = 100
+PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
diff --git a/pym/euscan/handlers/generic.py b/pym/euscan/handlers/generic.py
index a9a3048..831dfce 100644
--- a/pym/euscan/handlers/generic.py
+++ b/pym/euscan/handlers/generic.py
@@ -1,3 +1,4 @@
+from urlparse import urljoin
import urllib2
import re
import StringIO
@@ -34,8 +35,7 @@ def scan_html(data, url, pattern):
match = re.match(pattern, href, re.I)
if match:
- results.append((match.group(1), match.group(0)))
-
+ results.append((".".join(match.groups()), match.group(0)))
return results
@@ -47,7 +47,7 @@ def scan_ftp(data, url, pattern):
line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line, re.I)
if match:
- results.append((match.group(1), match.group(0)))
+ results.append((".".join(match.groups()), match.group(0)))
return results
@@ -77,7 +77,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
results = []
- if re.search("<\s*a\s+[^>]*href", data):
+ if re.search("<\s*a\s+[^>]*href", data, re.I):
results.extend(scan_html(data, url, pattern))
elif url.startswith('ftp://'):
results.extend(scan_ftp(data, url, pattern))
@@ -88,11 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
pv = helpers.gentoo_mangle_version(up_pv)
if helpers.version_filtered(cp, ver, pv):
continue
-
- if not url.endswith('/') and not path.startswith('/'):
- path = url + '/' + path
- else:
- path = url + path
+ path = urljoin(url, path)
if not steps and path not in orig_url:
versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
diff --git a/pym/euscan/handlers/github.py b/pym/euscan/handlers/github.py
index 76c50c9..9bb5596 100644
--- a/pym/euscan/handlers/github.py
+++ b/pym/euscan/handlers/github.py
@@ -8,7 +8,7 @@ from euscan import helpers, output
HANDLER_NAME = "github"
CONFIDENCE = 100.0
-PRIORITY = 100
+PRIORITY = 90
def can_handle(pkg, url):
diff --git a/pym/euscan/handlers/kde.py b/pym/euscan/handlers/kde.py
index 1dcead6..21722bb 100644
--- a/pym/euscan/handlers/kde.py
+++ b/pym/euscan/handlers/kde.py
@@ -1,6 +1,6 @@
from euscan.handlers import generic
-PRIORITY = 100
+PRIORITY = 90
HANDLER_NAME = "kde"
@@ -14,7 +14,7 @@ def can_handle(pkg, url):
def clean_results(results):
ret = []
- for path, version, confidence in results:
+ for path, version, _, confidence in results:
if version == '5SUMS':
continue
ret.append((path, version, HANDLER_NAME, confidence))
diff --git a/pym/euscan/handlers/php.py b/pym/euscan/handlers/php.py
index 36bf3d7..6b74ff6 100644
--- a/pym/euscan/handlers/php.py
+++ b/pym/euscan/handlers/php.py
@@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "php"
CONFIDENCE = 100.0
-PRIORITY = 100
+PRIORITY = 90
def can_handle(pkg, url):
diff --git a/pym/euscan/handlers/pypi.py b/pym/euscan/handlers/pypi.py
index 74aaeb3..9cd1620 100644
--- a/pym/euscan/handlers/pypi.py
+++ b/pym/euscan/handlers/pypi.py
@@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "pypi"
CONFIDENCE = 100.0
-PRIORITY = 100
+PRIORITY = 90
def can_handle(pkg, url):
diff --git a/pym/euscan/handlers/rubygem.py b/pym/euscan/handlers/rubygem.py
index 529e6d4..39e2334 100644
--- a/pym/euscan/handlers/rubygem.py
+++ b/pym/euscan/handlers/rubygem.py
@@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "rubygem"
CONFIDENCE = 100.0
-PRIORITY = 100
+PRIORITY = 90
def can_handle(pkg, url):
diff --git a/pym/euscan/handlers/watch.py b/pym/euscan/handlers/watch.py
new file mode 100644
index 0000000..d172072
--- /dev/null
+++ b/pym/euscan/handlers/watch.py
@@ -0,0 +1,139 @@
+import re
+import urllib2
+
+import portage
+
+from euscan.handlers import generic
+from euscan import output, helpers
+
+PRIORITY = 100
+
+HANDLER_NAME = "watch"
+CONFIDENCE = 100.0
+
+
+is_pattern = r"\([^\/]+\)"
+
+
+def can_handle(pkg, url):
+ try:
+ return pkg.metadata._xml_tree.find("upstream").find("watch") \
+ is not None
+ except AttributeError:
+ return False
+
+
+def parse_mangles(mangles, string):
+ for mangle in mangles:
+ # convert regex from perl format to python format
+ m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
+ pattern, repl = m.groups()
+ repl = re.sub(r"\$(\d+)", r"\\\1", repl)
+ string = re.sub(pattern, repl, string)
+ return string
+
+
+def clean_results(results, versionmangle, urlmangle):
+ ret = []
+
+ for path, version, _, _ in results:
+ version = parse_mangles(versionmangle, version)
+ path = parse_mangles(urlmangle, path)
+ ret.append((path, version, HANDLER_NAME, CONFIDENCE))
+
+ return ret
+
+
+def parse_watch(pkg):
+ for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
+ try:
+ base, file_pattern = watch_tag.text.split(" ")[:2]
+ except ValueError:
+ base, file_pattern = watch_tag.text, None
+
+ # the file pattern can be in the base url
+ pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
+ match = re.search(pattern_regex, base)
+ if match:
+ file_pattern = match.group(1)
+ base = base.replace(file_pattern, "")
+
+ # handle sf.net specially
+ base = base.replace(
+ "http://sf.net/", "http://qa.debian.org/watch/sf.php/"
+ )
+
+ vmangle = watch_tag.attrib.get("uversionmangle", None) or \
+ watch_tag.attrib.get("versionmangle", None)
+ versionmangle = vmangle.split(";") if vmangle else []
+
+ umangle = watch_tag.attrib.get("downloadurlmangle", None)
+ urlmangle = umangle.split(";") if umangle else []
+
+ yield (base, file_pattern, versionmangle, urlmangle)
+
+
+def handle_directory_patterns(base, file_pattern):
+ """
+ Directory pattern matching
+ e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
+ file_pattern: nessus-core-([\d\.]+)\.tar\.gz
+ """
+ splitted = base.split("/")
+ i = 0
+ basedir = []
+ for elem in splitted:
+ if re.search(is_pattern, elem):
+ break
+ basedir.append(elem)
+ i += 1
+ basedir = "/".join(basedir)
+ directory_pattern = splitted[i]
+ final = "/".join(splitted[i + 1:])
+
+ try:
+ fp = helpers.urlopen(basedir)
+ except urllib2.URLError:
+ return []
+ except IOError:
+ return []
+
+ if not fp:
+ return []
+
+ data = fp.read()
+
+ if basedir.startswith("ftp://"):
+ scan_data = generic.scan_ftp(data, basedir, directory_pattern)
+ else:
+ scan_data = generic.scan_html(data, basedir, directory_pattern)
+
+ return [("/".join((basedir, path, final)), file_pattern)
+ for _, path in scan_data]
+
+
+def scan(pkg, url):
+ output.einfo("Using watch data")
+
+ cp, ver, rev = portage.pkgsplit(pkg.cpv)
+
+ results = []
+ for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
+ if not re.search(is_pattern, base):
+ steps = [(base, file_pattern)]
+ res = generic.scan_directory_recursive(
+ cp, ver, rev, "", steps, url
+ )
+ else:
+ res = []
+ for step in handle_directory_patterns(base, file_pattern):
+ res += generic.scan_directory_recursive(
+ cp, ver, rev, "", [step], url
+ )
+
+ results += clean_results(res, versionmangle, urlmangle)
+ return results
+
+
+def brute_force(pkg, url):
+ return []
diff --git a/pym/euscan/helpers.py b/pym/euscan/helpers.py
index 1e385cd..6582393 100644
--- a/pym/euscan/helpers.py
+++ b/pym/euscan/helpers.py
@@ -33,6 +33,7 @@ VERSION_CMP_PACKAGE_QUIRKS = {
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
+
# Stolen from g-pypi
def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed
@@ -537,6 +538,7 @@ def generate_scan_paths(url):
return steps
+
def parse_mirror(uri):
from random import shuffle