euscan: Added watch handler, fixed generic one

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-07-24 15:02:36 +02:00
parent 266838b308
commit c35065e344
11 changed files with 213 additions and 49 deletions

View File

@ -11,6 +11,7 @@ import logging
import shutil import shutil
import subprocess import subprocess
from portage.exception import AmbiguousPackageName
from gentoolkit.query import Query from gentoolkit.query import Query
from BeautifulSoup import BeautifulSoup, SoupStrainer from BeautifulSoup import BeautifulSoup, SoupStrainer
@ -133,40 +134,60 @@ def get_deb_url(name):
def patch_metadata(metadata_path, watch_data, diff=False): def patch_metadata(metadata_path, watch_data, diff=False):
watch_data = "\n".join([line for line in watch_data.split("\n") logger.info(" Patching metadata file")
if not line.startswith("#")]) # comments
watch_data = watch_data.replace("\\\n", "") # remove backslashes
watch_data = " ".join(watch_data.split()) # remove extra spaces and \n
result = re.match(
r'(version=\d+?) (?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', watch_data
)
version, attrs_quote, attrs, url = result.groups()
attrs = attrs_quote or attrs
if attrs:
attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
attrs = " ".join(attrs)
with open(metadata_path) as fp: with open(metadata_path) as fp:
original = fp.read() original = fp.read()
rindent, indent = guess_indent_values(original) rindent, indent = guess_indent_values(original)
data = original data = original
logger.info(" Patching metadata file") # clean watch_data
watch_data = "\n".join([line for line in watch_data.split("\n")
if not line.startswith("#")]) # comments
if attrs: watch_data = watch_data.replace("\\\n", "") # remove backslashes
watch_tag = '%s<watch %s %s>%s</watch>' % (indent, version, attrs, url)
else: watch_tags = []
watch_tag = '%s<watch %s>%s</watch>' % (indent, version, url)
for watch_line in watch_data.split("\n"): # there can be multiple lines
watch_line = " ".join(watch_line.split()) # remove extra spaces and \n
version_parse = re.match("version=(\d+?)", watch_line)
if version_parse:
version = version_parse.group(1)
continue
if not watch_line: # skip empty lines
continue
# parse watch_line
result = re.match(
r'(?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)',
watch_line
)
attrs_quote, attrs, url = result.groups()
attrs = attrs_quote or attrs
if attrs:
attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
attrs = " ".join(attrs)
if attrs:
watch_tag = '%s<watch version="%s" %s>%s</watch>' % \
(indent, version, attrs, url)
else:
watch_tag = '%s<watch version="%s">%s</watch>' % \
(indent, version, url)
watch_tags.append(watch_tag)
watch_tags = "\n".join(watch_tags)
if '<upstream>' in data: if '<upstream>' in data:
data = data.replace('<upstream>', '<upstream>\n%s' % watch_tag, 1) data = data.replace('<upstream>', '<upstream>\n%s' % watch_tags, 1)
else: else:
rep = '%s<upstream>\n%s\n%s</upstream>\n</pkgmetadata>' % \ rep = '%s<upstream>\n%s\n%s</upstream>\n</pkgmetadata>' % \
(rindent, watch_tag, rindent) (rindent, watch_tags, rindent)
data = data.replace('</pkgmetadata>', rep, 1) data = data.replace('</pkgmetadata>', rep, 1)
if not diff: if not diff:
@ -183,14 +204,18 @@ def patch_metadata(metadata_path, watch_data, diff=False):
def process_package(query, diff=False): def process_package(query, diff=False):
matches = Query(query).smart_find( try:
in_installed=True, matches = Query(query).smart_find(
in_porttree=True, in_installed=True,
in_overlay=True, in_porttree=True,
include_masked=True, in_overlay=True,
show_progress=False, include_masked=True,
no_matches_fatal=False, show_progress=False,
) no_matches_fatal=False,
)
except AmbiguousPackageName:
logger.error(" Ambiguous package name")
return None
if len(matches) == 0: if len(matches) == 0:
logger.error(" Package not found") logger.error(" Package not found")
@ -224,7 +249,9 @@ def main():
for package in packages: for package in packages:
logger.info("Processing %s..." % package) logger.info("Processing %s..." % package)
print process_package(package, opts.diff) result = process_package(package, opts.diff)
if result:
print result
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -250,7 +250,7 @@ def package(request, category, package):
'upstream': upstream, 'upstream': upstream,
'log': log, 'log': log,
'vlog': vlog, 'vlog': vlog,
'msg' : msg, 'msg': msg,
'last_scan': last_scan, 'last_scan': last_scan,
'favourited': favourited, 'favourited': favourited,
'refreshed': refreshed, 'refreshed': refreshed,

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "cpan" HANDLER_NAME = "cpan"
CONFIDENCE = 100.0 CONFIDENCE = 100.0
PRIORITY = 100 PRIORITY = 90
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")

View File

@ -1,3 +1,4 @@
from urlparse import urljoin
import urllib2 import urllib2
import re import re
import StringIO import StringIO
@ -34,8 +35,7 @@ def scan_html(data, url, pattern):
match = re.match(pattern, href, re.I) match = re.match(pattern, href, re.I)
if match: if match:
results.append((match.group(1), match.group(0))) results.append((".".join(match.groups()), match.group(0)))
return results return results
@ -47,7 +47,7 @@ def scan_ftp(data, url, pattern):
line = line.replace("\n", "").replace("\r", "") line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line, re.I) match = re.search(pattern, line, re.I)
if match: if match:
results.append((match.group(1), match.group(0))) results.append((".".join(match.groups()), match.group(0)))
return results return results
@ -77,7 +77,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
results = [] results = []
if re.search("<\s*a\s+[^>]*href", data): if re.search("<\s*a\s+[^>]*href", data, re.I):
results.extend(scan_html(data, url, pattern)) results.extend(scan_html(data, url, pattern))
elif url.startswith('ftp://'): elif url.startswith('ftp://'):
results.extend(scan_ftp(data, url, pattern)) results.extend(scan_ftp(data, url, pattern))
@ -88,11 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
pv = helpers.gentoo_mangle_version(up_pv) pv = helpers.gentoo_mangle_version(up_pv)
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
path = urljoin(url, path)
if not url.endswith('/') and not path.startswith('/'):
path = url + '/' + path
else:
path = url + path
if not steps and path not in orig_url: if not steps and path not in orig_url:
versions.append((path, pv, HANDLER_NAME, CONFIDENCE)) versions.append((path, pv, HANDLER_NAME, CONFIDENCE))

View File

@ -8,7 +8,7 @@ from euscan import helpers, output
HANDLER_NAME = "github" HANDLER_NAME = "github"
CONFIDENCE = 100.0 CONFIDENCE = 100.0
PRIORITY = 100 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url):

View File

@ -1,6 +1,6 @@
from euscan.handlers import generic from euscan.handlers import generic
PRIORITY = 100 PRIORITY = 90
HANDLER_NAME = "kde" HANDLER_NAME = "kde"
@ -14,7 +14,7 @@ def can_handle(pkg, url):
def clean_results(results): def clean_results(results):
ret = [] ret = []
for path, version, confidence in results: for path, version, _, confidence in results:
if version == '5SUMS': if version == '5SUMS':
continue continue
ret.append((path, version, HANDLER_NAME, confidence)) ret.append((path, version, HANDLER_NAME, confidence))

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "php" HANDLER_NAME = "php"
CONFIDENCE = 100.0 CONFIDENCE = 100.0
PRIORITY = 100 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url):

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "pypi" HANDLER_NAME = "pypi"
CONFIDENCE = 100.0 CONFIDENCE = 100.0
PRIORITY = 100 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url):

View File

@ -7,7 +7,7 @@ from euscan import helpers, output
HANDLER_NAME = "rubygem" HANDLER_NAME = "rubygem"
CONFIDENCE = 100.0 CONFIDENCE = 100.0
PRIORITY = 100 PRIORITY = 90
def can_handle(pkg, url): def can_handle(pkg, url):

View File

@ -0,0 +1,139 @@
import re
import urllib2
import portage
from euscan.handlers import generic
from euscan import output, helpers
PRIORITY = 100
HANDLER_NAME = "watch"
CONFIDENCE = 100.0
is_pattern = r"\([^\/]+\)"
def can_handle(pkg, url):
try:
return pkg.metadata._xml_tree.find("upstream").find("watch") \
is not None
except AttributeError:
return False
def parse_mangles(mangles, string):
for mangle in mangles:
# convert regex from perl format to python format
m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
pattern, repl = m.groups()
repl = re.sub(r"\$(\d+)", r"\\\1", repl)
string = re.sub(pattern, repl, string)
return string
def clean_results(results, versionmangle, urlmangle):
ret = []
for path, version, _, _ in results:
version = parse_mangles(versionmangle, version)
path = parse_mangles(urlmangle, path)
ret.append((path, version, HANDLER_NAME, CONFIDENCE))
return ret
def parse_watch(pkg):
for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
try:
base, file_pattern = watch_tag.text.split(" ")[:2]
except ValueError:
base, file_pattern = watch_tag.text, None
# the file pattern can be in the base url
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
match = re.search(pattern_regex, base)
if match:
file_pattern = match.group(1)
base = base.replace(file_pattern, "")
# handle sf.net specially
base = base.replace(
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
)
vmangle = watch_tag.attrib.get("uversionmangle", None) or \
watch_tag.attrib.get("versionmangle", None)
versionmangle = vmangle.split(";") if vmangle else []
umangle = watch_tag.attrib.get("downloadurlmangle", None)
urlmangle = umangle.split(";") if umangle else []
yield (base, file_pattern, versionmangle, urlmangle)
def handle_directory_patterns(base, file_pattern):
"""
Directory pattern matching
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
"""
splitted = base.split("/")
i = 0
basedir = []
for elem in splitted:
if re.search(is_pattern, elem):
break
basedir.append(elem)
i += 1
basedir = "/".join(basedir)
directory_pattern = splitted[i]
final = "/".join(splitted[i + 1:])
try:
fp = helpers.urlopen(basedir)
except urllib2.URLError:
return []
except IOError:
return []
if not fp:
return []
data = fp.read()
if basedir.startswith("ftp://"):
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
else:
scan_data = generic.scan_html(data, basedir, directory_pattern)
return [("/".join((basedir, path, final)), file_pattern)
for _, path in scan_data]
def scan(pkg, url):
output.einfo("Using watch data")
cp, ver, rev = portage.pkgsplit(pkg.cpv)
results = []
for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
if not re.search(is_pattern, base):
steps = [(base, file_pattern)]
res = generic.scan_directory_recursive(
cp, ver, rev, "", steps, url
)
else:
res = []
for step in handle_directory_patterns(base, file_pattern):
res += generic.scan_directory_recursive(
cp, ver, rev, "", [step], url
)
results += clean_results(res, versionmangle, urlmangle)
return results
def brute_force(pkg, url):
return []

View File

@ -33,6 +33,7 @@ VERSION_CMP_PACKAGE_QUIRKS = {
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)' _v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))' _v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
# Stolen from g-pypi # Stolen from g-pypi
def gentoo_mangle_version(up_pv): def gentoo_mangle_version(up_pv):
"""Convert PV to MY_PV if needed """Convert PV to MY_PV if needed
@ -537,6 +538,7 @@ def generate_scan_paths(url):
return steps return steps
def parse_mirror(uri): def parse_mirror(uri):
from random import shuffle from random import shuffle