euscan: Added watch handler, fixed generic one

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino
2012-07-24 15:02:36 +02:00
committed by Corentin Chary
parent 795dcb1414
commit e2890c6364
11 changed files with 213 additions and 49 deletions

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "cpan"
CONFIDENCE = 100.0
PRIORITY = 100
PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")

View File

@ -1,3 +1,4 @@
from urlparse import urljoin
import urllib2
import re
import StringIO
@ -34,8 +35,7 @@ def scan_html(data, url, pattern):
match = re.match(pattern, href, re.I)
if match:
results.append((match.group(1), match.group(0)))
results.append((".".join(match.groups()), match.group(0)))
return results
@ -47,7 +47,7 @@ def scan_ftp(data, url, pattern):
line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line, re.I)
if match:
results.append((match.group(1), match.group(0)))
results.append((".".join(match.groups()), match.group(0)))
return results
@ -77,7 +77,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
results = []
if re.search("<\s*a\s+[^>]*href", data):
if re.search("<\s*a\s+[^>]*href", data, re.I):
results.extend(scan_html(data, url, pattern))
elif url.startswith('ftp://'):
results.extend(scan_ftp(data, url, pattern))
@ -88,11 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
pv = helpers.gentoo_mangle_version(up_pv)
if helpers.version_filtered(cp, ver, pv):
continue
if not url.endswith('/') and not path.startswith('/'):
path = url + '/' + path
else:
path = url + path
path = urljoin(url, path)
if not steps and path not in orig_url:
versions.append((path, pv, HANDLER_NAME, CONFIDENCE))

View File

@ -8,7 +8,7 @@ from euscan import helpers, output
HANDLER_NAME = "github"
CONFIDENCE = 100.0
PRIORITY = 100
PRIORITY = 90
def can_handle(pkg, url):

View File

@ -1,6 +1,6 @@
from euscan.handlers import generic
PRIORITY = 100
PRIORITY = 90
HANDLER_NAME = "kde"
@ -14,7 +14,7 @@ def can_handle(pkg, url):
def clean_results(results):
ret = []
for path, version, confidence in results:
for path, version, _, confidence in results:
if version == '5SUMS':
continue
ret.append((path, version, HANDLER_NAME, confidence))

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "php"
CONFIDENCE = 100.0
PRIORITY = 100
PRIORITY = 90
def can_handle(pkg, url):

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "pypi"
CONFIDENCE = 100.0
PRIORITY = 100
PRIORITY = 90
def can_handle(pkg, url):

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "rubygem"
CONFIDENCE = 100.0
PRIORITY = 100
PRIORITY = 90
def can_handle(pkg, url):

View File

@ -0,0 +1,139 @@
import re
import urllib2
import portage
from euscan.handlers import generic
from euscan import output, helpers
PRIORITY = 100
HANDLER_NAME = "watch"
CONFIDENCE = 100.0
is_pattern = r"\([^\/]+\)"
def can_handle(pkg, url):
try:
return pkg.metadata._xml_tree.find("upstream").find("watch") \
is not None
except AttributeError:
return False
def parse_mangles(mangles, string):
for mangle in mangles:
# convert regex from perl format to python format
m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
pattern, repl = m.groups()
repl = re.sub(r"\$(\d+)", r"\\\1", repl)
string = re.sub(pattern, repl, string)
return string
def clean_results(results, versionmangle, urlmangle):
ret = []
for path, version, _, _ in results:
version = parse_mangles(versionmangle, version)
path = parse_mangles(urlmangle, path)
ret.append((path, version, HANDLER_NAME, CONFIDENCE))
return ret
def parse_watch(pkg):
for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
try:
base, file_pattern = watch_tag.text.split(" ")[:2]
except ValueError:
base, file_pattern = watch_tag.text, None
# the file pattern can be in the base url
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
match = re.search(pattern_regex, base)
if match:
file_pattern = match.group(1)
base = base.replace(file_pattern, "")
# handle sf.net specially
base = base.replace(
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
)
vmangle = watch_tag.attrib.get("uversionmangle", None) or \
watch_tag.attrib.get("versionmangle", None)
versionmangle = vmangle.split(";") if vmangle else []
umangle = watch_tag.attrib.get("downloadurlmangle", None)
urlmangle = umangle.split(";") if umangle else []
yield (base, file_pattern, versionmangle, urlmangle)
def handle_directory_patterns(base, file_pattern):
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
"""
splitted = base.split("/")
i = 0
basedir = []
for elem in splitted:
if re.search(is_pattern, elem):
break
basedir.append(elem)
i += 1
basedir = "/".join(basedir)
directory_pattern = splitted[i]
final = "/".join(splitted[i + 1:])
try:
fp = helpers.urlopen(basedir)
except urllib2.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
if basedir.startswith("ftp://"):
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
else:
scan_data = generic.scan_html(data, basedir, directory_pattern)
return [("/".join((basedir, path, final)), file_pattern)
for _, path in scan_data]
def scan(pkg, url):
output.einfo("Using watch data")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
results = []
for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
if not re.search(is_pattern, base):
steps = [(base, file_pattern)]
res = generic.scan_directory_recursive(
cp, ver, rev, "", steps, url
)
else:
res = []
for step in handle_directory_patterns(base, file_pattern):
res += generic.scan_directory_recursive(
cp, ver, rev, "", [step], url
)
results += clean_results(res, versionmangle, urlmangle)
return results
def brute_force(pkg, url):
return []

View File

@ -33,6 +33,7 @@ VERSION_CMP_PACKAGE_QUIRKS = {
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
# Stolen from g-pypi
def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed
@ -537,6 +538,7 @@ def generate_scan_paths(url):
return steps
def parse_mirror(uri):
from random import shuffle