euscan: Added watch handler, fixed generic one
Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
parent
266838b308
commit
c35065e344
@ -11,6 +11,7 @@ import logging
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
from portage.exception import AmbiguousPackageName
|
||||||
from gentoolkit.query import Query
|
from gentoolkit.query import Query
|
||||||
from BeautifulSoup import BeautifulSoup, SoupStrainer
|
from BeautifulSoup import BeautifulSoup, SoupStrainer
|
||||||
|
|
||||||
@ -133,40 +134,60 @@ def get_deb_url(name):
|
|||||||
|
|
||||||
|
|
||||||
def patch_metadata(metadata_path, watch_data, diff=False):
|
def patch_metadata(metadata_path, watch_data, diff=False):
|
||||||
watch_data = "\n".join([line for line in watch_data.split("\n")
|
logger.info(" Patching metadata file")
|
||||||
if not line.startswith("#")]) # comments
|
|
||||||
watch_data = watch_data.replace("\\\n", "") # remove backslashes
|
|
||||||
watch_data = " ".join(watch_data.split()) # remove extra spaces and \n
|
|
||||||
|
|
||||||
result = re.match(
|
|
||||||
r'(version=\d+?) (?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)', watch_data
|
|
||||||
)
|
|
||||||
|
|
||||||
version, attrs_quote, attrs, url = result.groups()
|
|
||||||
attrs = attrs_quote or attrs
|
|
||||||
|
|
||||||
if attrs:
|
|
||||||
attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
|
|
||||||
attrs = " ".join(attrs)
|
|
||||||
|
|
||||||
with open(metadata_path) as fp:
|
with open(metadata_path) as fp:
|
||||||
original = fp.read()
|
original = fp.read()
|
||||||
rindent, indent = guess_indent_values(original)
|
rindent, indent = guess_indent_values(original)
|
||||||
|
|
||||||
data = original
|
data = original
|
||||||
|
|
||||||
logger.info(" Patching metadata file")
|
# clean watch_data
|
||||||
|
watch_data = "\n".join([line for line in watch_data.split("\n")
|
||||||
|
if not line.startswith("#")]) # comments
|
||||||
|
|
||||||
if attrs:
|
watch_data = watch_data.replace("\\\n", "") # remove backslashes
|
||||||
watch_tag = '%s<watch %s %s>%s</watch>' % (indent, version, attrs, url)
|
|
||||||
else:
|
watch_tags = []
|
||||||
watch_tag = '%s<watch %s>%s</watch>' % (indent, version, url)
|
|
||||||
|
for watch_line in watch_data.split("\n"): # there can be multiple lines
|
||||||
|
watch_line = " ".join(watch_line.split()) # remove extra spaces and \n
|
||||||
|
|
||||||
|
version_parse = re.match("version=(\d+?)", watch_line)
|
||||||
|
if version_parse:
|
||||||
|
version = version_parse.group(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not watch_line: # skip empty lines
|
||||||
|
continue
|
||||||
|
|
||||||
|
# parse watch_line
|
||||||
|
result = re.match(
|
||||||
|
r'(?:opts=(?:"([^"]+?)"|([^\s]+?)) )?(.*)',
|
||||||
|
watch_line
|
||||||
|
)
|
||||||
|
|
||||||
|
attrs_quote, attrs, url = result.groups()
|
||||||
|
attrs = attrs_quote or attrs
|
||||||
|
|
||||||
|
if attrs:
|
||||||
|
attrs = [x.replace('=', '="') + '"' for x in attrs.split(",")]
|
||||||
|
attrs = " ".join(attrs)
|
||||||
|
|
||||||
|
if attrs:
|
||||||
|
watch_tag = '%s<watch version="%s" %s>%s</watch>' % \
|
||||||
|
(indent, version, attrs, url)
|
||||||
|
else:
|
||||||
|
watch_tag = '%s<watch version="%s">%s</watch>' % \
|
||||||
|
(indent, version, url)
|
||||||
|
watch_tags.append(watch_tag)
|
||||||
|
|
||||||
|
watch_tags = "\n".join(watch_tags)
|
||||||
|
|
||||||
if '<upstream>' in data:
|
if '<upstream>' in data:
|
||||||
data = data.replace('<upstream>', '<upstream>\n%s' % watch_tag, 1)
|
data = data.replace('<upstream>', '<upstream>\n%s' % watch_tags, 1)
|
||||||
else:
|
else:
|
||||||
rep = '%s<upstream>\n%s\n%s</upstream>\n</pkgmetadata>' % \
|
rep = '%s<upstream>\n%s\n%s</upstream>\n</pkgmetadata>' % \
|
||||||
(rindent, watch_tag, rindent)
|
(rindent, watch_tags, rindent)
|
||||||
data = data.replace('</pkgmetadata>', rep, 1)
|
data = data.replace('</pkgmetadata>', rep, 1)
|
||||||
|
|
||||||
if not diff:
|
if not diff:
|
||||||
@ -183,14 +204,18 @@ def patch_metadata(metadata_path, watch_data, diff=False):
|
|||||||
|
|
||||||
|
|
||||||
def process_package(query, diff=False):
|
def process_package(query, diff=False):
|
||||||
matches = Query(query).smart_find(
|
try:
|
||||||
in_installed=True,
|
matches = Query(query).smart_find(
|
||||||
in_porttree=True,
|
in_installed=True,
|
||||||
in_overlay=True,
|
in_porttree=True,
|
||||||
include_masked=True,
|
in_overlay=True,
|
||||||
show_progress=False,
|
include_masked=True,
|
||||||
no_matches_fatal=False,
|
show_progress=False,
|
||||||
)
|
no_matches_fatal=False,
|
||||||
|
)
|
||||||
|
except AmbiguousPackageName:
|
||||||
|
logger.error(" Ambiguous package name")
|
||||||
|
return None
|
||||||
|
|
||||||
if len(matches) == 0:
|
if len(matches) == 0:
|
||||||
logger.error(" Package not found")
|
logger.error(" Package not found")
|
||||||
@ -224,7 +249,9 @@ def main():
|
|||||||
|
|
||||||
for package in packages:
|
for package in packages:
|
||||||
logger.info("Processing %s..." % package)
|
logger.info("Processing %s..." % package)
|
||||||
print process_package(package, opts.diff)
|
result = process_package(package, opts.diff)
|
||||||
|
if result:
|
||||||
|
print result
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -250,7 +250,7 @@ def package(request, category, package):
|
|||||||
'upstream': upstream,
|
'upstream': upstream,
|
||||||
'log': log,
|
'log': log,
|
||||||
'vlog': vlog,
|
'vlog': vlog,
|
||||||
'msg' : msg,
|
'msg': msg,
|
||||||
'last_scan': last_scan,
|
'last_scan': last_scan,
|
||||||
'favourited': favourited,
|
'favourited': favourited,
|
||||||
'refreshed': refreshed,
|
'refreshed': refreshed,
|
||||||
|
@ -7,7 +7,7 @@ from euscan import helpers, output
|
|||||||
|
|
||||||
HANDLER_NAME = "cpan"
|
HANDLER_NAME = "cpan"
|
||||||
CONFIDENCE = 100.0
|
CONFIDENCE = 100.0
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
|
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
from urlparse import urljoin
|
||||||
import urllib2
|
import urllib2
|
||||||
import re
|
import re
|
||||||
import StringIO
|
import StringIO
|
||||||
@ -34,8 +35,7 @@ def scan_html(data, url, pattern):
|
|||||||
|
|
||||||
match = re.match(pattern, href, re.I)
|
match = re.match(pattern, href, re.I)
|
||||||
if match:
|
if match:
|
||||||
results.append((match.group(1), match.group(0)))
|
results.append((".".join(match.groups()), match.group(0)))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ def scan_ftp(data, url, pattern):
|
|||||||
line = line.replace("\n", "").replace("\r", "")
|
line = line.replace("\n", "").replace("\r", "")
|
||||||
match = re.search(pattern, line, re.I)
|
match = re.search(pattern, line, re.I)
|
||||||
if match:
|
if match:
|
||||||
results.append((match.group(1), match.group(0)))
|
results.append((".".join(match.groups()), match.group(0)))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
if re.search("<\s*a\s+[^>]*href", data):
|
if re.search("<\s*a\s+[^>]*href", data, re.I):
|
||||||
results.extend(scan_html(data, url, pattern))
|
results.extend(scan_html(data, url, pattern))
|
||||||
elif url.startswith('ftp://'):
|
elif url.startswith('ftp://'):
|
||||||
results.extend(scan_ftp(data, url, pattern))
|
results.extend(scan_ftp(data, url, pattern))
|
||||||
@ -88,11 +88,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
|
|||||||
pv = helpers.gentoo_mangle_version(up_pv)
|
pv = helpers.gentoo_mangle_version(up_pv)
|
||||||
if helpers.version_filtered(cp, ver, pv):
|
if helpers.version_filtered(cp, ver, pv):
|
||||||
continue
|
continue
|
||||||
|
path = urljoin(url, path)
|
||||||
if not url.endswith('/') and not path.startswith('/'):
|
|
||||||
path = url + '/' + path
|
|
||||||
else:
|
|
||||||
path = url + path
|
|
||||||
|
|
||||||
if not steps and path not in orig_url:
|
if not steps and path not in orig_url:
|
||||||
versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
|
versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
|
||||||
|
@ -8,7 +8,7 @@ from euscan import helpers, output
|
|||||||
|
|
||||||
HANDLER_NAME = "github"
|
HANDLER_NAME = "github"
|
||||||
CONFIDENCE = 100.0
|
CONFIDENCE = 100.0
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
|
|
||||||
def can_handle(pkg, url):
|
def can_handle(pkg, url):
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from euscan.handlers import generic
|
from euscan.handlers import generic
|
||||||
|
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
HANDLER_NAME = "kde"
|
HANDLER_NAME = "kde"
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ def can_handle(pkg, url):
|
|||||||
def clean_results(results):
|
def clean_results(results):
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for path, version, confidence in results:
|
for path, version, _, confidence in results:
|
||||||
if version == '5SUMS':
|
if version == '5SUMS':
|
||||||
continue
|
continue
|
||||||
ret.append((path, version, HANDLER_NAME, confidence))
|
ret.append((path, version, HANDLER_NAME, confidence))
|
||||||
|
@ -7,7 +7,7 @@ from euscan import helpers, output
|
|||||||
|
|
||||||
HANDLER_NAME = "php"
|
HANDLER_NAME = "php"
|
||||||
CONFIDENCE = 100.0
|
CONFIDENCE = 100.0
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
|
|
||||||
def can_handle(pkg, url):
|
def can_handle(pkg, url):
|
||||||
|
@ -7,7 +7,7 @@ from euscan import helpers, output
|
|||||||
|
|
||||||
HANDLER_NAME = "pypi"
|
HANDLER_NAME = "pypi"
|
||||||
CONFIDENCE = 100.0
|
CONFIDENCE = 100.0
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
|
|
||||||
def can_handle(pkg, url):
|
def can_handle(pkg, url):
|
||||||
|
@ -7,7 +7,7 @@ from euscan import helpers, output
|
|||||||
|
|
||||||
HANDLER_NAME = "rubygem"
|
HANDLER_NAME = "rubygem"
|
||||||
CONFIDENCE = 100.0
|
CONFIDENCE = 100.0
|
||||||
PRIORITY = 100
|
PRIORITY = 90
|
||||||
|
|
||||||
|
|
||||||
def can_handle(pkg, url):
|
def can_handle(pkg, url):
|
||||||
|
139
pym/euscan/handlers/watch.py
Normal file
139
pym/euscan/handlers/watch.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
import re
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
import portage
|
||||||
|
|
||||||
|
from euscan.handlers import generic
|
||||||
|
from euscan import output, helpers
|
||||||
|
|
||||||
|
PRIORITY = 100
|
||||||
|
|
||||||
|
HANDLER_NAME = "watch"
|
||||||
|
CONFIDENCE = 100.0
|
||||||
|
|
||||||
|
|
||||||
|
is_pattern = r"\([^\/]+\)"
|
||||||
|
|
||||||
|
|
||||||
|
def can_handle(pkg, url):
|
||||||
|
try:
|
||||||
|
return pkg.metadata._xml_tree.find("upstream").find("watch") \
|
||||||
|
is not None
|
||||||
|
except AttributeError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mangles(mangles, string):
|
||||||
|
for mangle in mangles:
|
||||||
|
# convert regex from perl format to python format
|
||||||
|
m = re.match(r"s/(.*[^\\])/(.*)/", mangle)
|
||||||
|
pattern, repl = m.groups()
|
||||||
|
repl = re.sub(r"\$(\d+)", r"\\\1", repl)
|
||||||
|
string = re.sub(pattern, repl, string)
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
||||||
|
def clean_results(results, versionmangle, urlmangle):
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for path, version, _, _ in results:
|
||||||
|
version = parse_mangles(versionmangle, version)
|
||||||
|
path = parse_mangles(urlmangle, path)
|
||||||
|
ret.append((path, version, HANDLER_NAME, CONFIDENCE))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def parse_watch(pkg):
|
||||||
|
for watch_tag in pkg.metadata._xml_tree.find("upstream").findall("watch"):
|
||||||
|
try:
|
||||||
|
base, file_pattern = watch_tag.text.split(" ")[:2]
|
||||||
|
except ValueError:
|
||||||
|
base, file_pattern = watch_tag.text, None
|
||||||
|
|
||||||
|
# the file pattern can be in the base url
|
||||||
|
pattern_regex = r"/([^/]*\([^/]*\)[^/]*)$"
|
||||||
|
match = re.search(pattern_regex, base)
|
||||||
|
if match:
|
||||||
|
file_pattern = match.group(1)
|
||||||
|
base = base.replace(file_pattern, "")
|
||||||
|
|
||||||
|
# handle sf.net specially
|
||||||
|
base = base.replace(
|
||||||
|
"http://sf.net/", "http://qa.debian.org/watch/sf.php/"
|
||||||
|
)
|
||||||
|
|
||||||
|
vmangle = watch_tag.attrib.get("uversionmangle", None) or \
|
||||||
|
watch_tag.attrib.get("versionmangle", None)
|
||||||
|
versionmangle = vmangle.split(";") if vmangle else []
|
||||||
|
|
||||||
|
umangle = watch_tag.attrib.get("downloadurlmangle", None)
|
||||||
|
urlmangle = umangle.split(";") if umangle else []
|
||||||
|
|
||||||
|
yield (base, file_pattern, versionmangle, urlmangle)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_directory_patterns(base, file_pattern):
|
||||||
|
"""
|
||||||
|
Directory pattern matching
|
||||||
|
e.g.: base: ftp://ftp.nessus.org/pub/nessus/nessus-([\d\.]+)/src/
|
||||||
|
file_pattern: nessus-core-([\d\.]+)\.tar\.gz
|
||||||
|
"""
|
||||||
|
splitted = base.split("/")
|
||||||
|
i = 0
|
||||||
|
basedir = []
|
||||||
|
for elem in splitted:
|
||||||
|
if re.search(is_pattern, elem):
|
||||||
|
break
|
||||||
|
basedir.append(elem)
|
||||||
|
i += 1
|
||||||
|
basedir = "/".join(basedir)
|
||||||
|
directory_pattern = splitted[i]
|
||||||
|
final = "/".join(splitted[i + 1:])
|
||||||
|
|
||||||
|
try:
|
||||||
|
fp = helpers.urlopen(basedir)
|
||||||
|
except urllib2.URLError:
|
||||||
|
return []
|
||||||
|
except IOError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not fp:
|
||||||
|
return []
|
||||||
|
|
||||||
|
data = fp.read()
|
||||||
|
|
||||||
|
if basedir.startswith("ftp://"):
|
||||||
|
scan_data = generic.scan_ftp(data, basedir, directory_pattern)
|
||||||
|
else:
|
||||||
|
scan_data = generic.scan_html(data, basedir, directory_pattern)
|
||||||
|
|
||||||
|
return [("/".join((basedir, path, final)), file_pattern)
|
||||||
|
for _, path in scan_data]
|
||||||
|
|
||||||
|
|
||||||
|
def scan(pkg, url):
|
||||||
|
output.einfo("Using watch data")
|
||||||
|
|
||||||
|
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for base, file_pattern, versionmangle, urlmangle in parse_watch(pkg):
|
||||||
|
if not re.search(is_pattern, base):
|
||||||
|
steps = [(base, file_pattern)]
|
||||||
|
res = generic.scan_directory_recursive(
|
||||||
|
cp, ver, rev, "", steps, url
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
res = []
|
||||||
|
for step in handle_directory_patterns(base, file_pattern):
|
||||||
|
res += generic.scan_directory_recursive(
|
||||||
|
cp, ver, rev, "", [step], url
|
||||||
|
)
|
||||||
|
|
||||||
|
results += clean_results(res, versionmangle, urlmangle)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def brute_force(pkg, url):
|
||||||
|
return []
|
@ -33,6 +33,7 @@ VERSION_CMP_PACKAGE_QUIRKS = {
|
|||||||
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
|
_v_end = '((-|_)(pre|p|beta|b|alpha|a|rc|r)\d*)'
|
||||||
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
|
_v = r'((\d+)((\.\d+)*)([a-zA-Z]*?)(' + _v_end + '*))'
|
||||||
|
|
||||||
|
|
||||||
# Stolen from g-pypi
|
# Stolen from g-pypi
|
||||||
def gentoo_mangle_version(up_pv):
|
def gentoo_mangle_version(up_pv):
|
||||||
"""Convert PV to MY_PV if needed
|
"""Convert PV to MY_PV if needed
|
||||||
@ -537,6 +538,7 @@ def generate_scan_paths(url):
|
|||||||
|
|
||||||
return steps
|
return steps
|
||||||
|
|
||||||
|
|
||||||
def parse_mirror(uri):
|
def parse_mirror(uri):
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user