euscan-ng/euscan

546 lines
13 KiB
Plaintext
Raw Normal View History

#!/usr/bin/python
##############################################################################
# $Header: $
##############################################################################
# Distributed under the terms of the GNU General Public License, v2 or later
# Author: Corentin Chary <corentin.chary@gmail.com>
# Gentoo new upstream release scan tool.
import os
import sys
import re
import StringIO
from stat import *
from xml.sax import saxutils, make_parser, handler
from xml.sax.handler import feature_namespaces
import urllib
import urllib2
import pkg_resources
import portage
from portage.output import *
from portage.dbapi.porttree import _parse_uri_map
from portage.exception import InvalidDependString
__version__ = "svn"
settings = {
"brute-force-level" : 2,
"brute-force" : True,
"brute-force-crazy" : True,
"scan-dir" : True,
"format" : "pretty",
"verbose" : True,
"stop-when-found" : False,
"check-all-files" : False,
}
output = EOutput()
output.quiet = not settings['verbose']
def cast_int_components(version):
for i, obj in enumerate(version):
try:
version[i] = int(obj)
except ValueError:
pass
return version
def parse_version(version):
version = pkg_resources.parse_version(version)
#version = list(version)
#return cast_int_components(version)
return version
def template_from_url(url, version):
prefix, chunks = url.split('://')
chunks = chunks.split('/')
for i in range(len(chunks)):
chunk = chunks[i]
if not chunk:
continue
# If it's the full version, it's easy
if version in chunk:
chunk = chunk.replace(version, '${PV}')
# For directories made from a part of the version
elif version.startswith(chunk):
full = split_version(version)
part = split_version(chunk)
for j in range(min(len(full), len(part))):
if part[j] != full[j]:
break
part[j] = '${%d}' % j
chunk = join_version(part)
chunk = chunk.replace('}$', '}.$')
chunks[i] = chunk
return prefix + "://" + "/".join(chunks)
def url_from_template(url, version):
components = split_version(version)
url = url.replace('${PV}', version)
for i in range(len(components)):
url = url.replace('${%d}' % i, str(components[i]))
return url
# Stolen from distutils.LooseVersion
# Used for brute force to increment the version
def split_version(version):
component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
components = filter(lambda x: x and x != '.', component_re.split(version))
for i in range(len(components)):
try:
components[i] = int(components[i])
except ValueError:
pass
return components
def join_version(components):
version = ""
for i in range(len(components)):
version += str(components[i])
if i >= len(components) - 1:
break
if type(components[i]) != str and type(components[i + 1]) != str:
version += "."
return version
def increment_version(components, level):
n = len(components)
if level > n - 1 or level < 0:
raise Exception
for i in range(n, level + 1, -1):
if type(components[i - 1]) == int:
components[i - 1] = 0
if type(components[level]) == int:
components[level] += 1
return components
def gen_versions(components, level):
n = len(components)
depth = level
level = min(level, n)
if not n:
return []
versions = []
for i in range(n, n - level, -1):
increment_version(components, i - 1)
for j in range(depth):
versions.append(list(components))
increment_version(components, i - 1)
return versions
def tryurl(fileurl):
result = False
output.ebegin("Trying: " + fileurl)
try:
fp = urllib2.urlopen(fileurl, None, 5)
headers = fp.info()
basename = os.path.basename(fileurl)
if 'Content-disposition' in headers and basename not in headers['Content-disposition']:
result = False
elif 'Content-Length' in headers and headers['Content-Length'] == '0':
result = False
elif 'text/html' in headers['Content-Type']:
result = False
else:
result = True
except:
retult = False
output.eend(errno.ENOENT if not result else 0)
return result
def regex_from_template(template):
template = re.escape(template)
template = template.replace('\$\{', '${')
template = template.replace('\}', '}')
template = template.replace('}\.$', '}.$')
template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w\.\-]+?)', template)
#template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template)
#template = re.sub(r'(\$\{\d+\})+', '(.+?)', template)
template = template.replace('${PV}', r'([\w\.\-]+?)')
template = template + r'/?$'
return template
def basedir_from_template(template):
idx = template.find('${')
if idx == -1:
return template
idx = template[0:idx].rfind('/')
if idx == -1:
return ""
return template[0:idx]
def generate_scan_paths(url):
prefix, chunks = url.split('://')
chunks = chunks.split('/')
steps = []
path = prefix + ":/"
for chunk in chunks:
if '${' in chunk:
steps.append((path, regex_from_template(chunk)))
path = ""
else:
path += "/"
path += chunk
return steps
def scan_directory_recursive(url, steps, vmin, vmax):
if not steps:
return []
url += steps[0][0]
pattern = steps[0][1]
steps = steps[1:]
output.einfo("Scanning: %s" % url)
try:
fp = urllib2.urlopen(url, None, 5)
except Exception, err:
return []
data = fp.read()
results = []
if re.search("<\s*a\s+[^>]*href", data):
from BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(data)
for link in soup.findAll('a'):
href = link.get("href")
if not href:
continue
if href.startswith(url):
href = href.replace(url, "", 1)
match = re.match(pattern, href)
if match:
results.append((match.group(1), match.group(0)))
elif url.startswith('ftp://'): # Probably a FTP Server
buf = StringIO.StringIO(data)
for line in buf.readlines():
line = line.replace("\n", "").replace("\r", "")
match = re.search(pattern, line)
if match:
results.append((match.group(1), match.group(0)))
# add url
versions = []
for version, path in results:
ver = parse_version(version)
if vmin and ver <= vmin:
continue
if vmax and ver >= vmax:
continue
if not url.endswith('/') and not path.startswith('/'):
path = url + '/' + path
else:
path = url + path
versions.append((path, version))
if steps:
ret = scan_directory_recursive(path, steps, vmin, vmax)
versions.extend(ret)
return versions
def scan_directory(cpv, fileurl, limit=None):
# Ftp: list dir
# Handle mirrors
if not settings["scan-dir"]:
return []
catpkg, ver, rev = portage.pkgsplit(cpv)
template = template_from_url(fileurl, ver)
if '${' not in template:
output.ewarn("Url doesn't seems to depend on version: %s not found in %s"
% (ver, fileurl))
return []
else:
output.einfo("Scanning: %s" % template)
vmin = parse_version(ver)
steps = generate_scan_paths(template)
return scan_directory_recursive("", steps, vmin, limit)
def brute_force(cpv, fileurl, limit=None):
if not settings["brute-force"]:
return []
catpkg, ver, rev = portage.pkgsplit(cpv)
components = split_version(ver)
versions = gen_versions(components, settings["brute-force-level"])
output.einfo("Generating version from " + ver)
if not versions:
output.ewarn("Can't generate new versions from " + ver)
return []
template = template_from_url(fileurl, ver)
if '${' not in template:
output.ewarn("Url doesn't seems to depend on version: %s not found in %s"
% (fileurl, ver))
return []
else:
output.einfo("Brute forcing: %s" % template)
result = []
i = 0
done = []
while i < len(versions):
components = versions[i]
i += 1
if components in done:
continue
done.append(tuple(components))
vstring = join_version(components)
version = parse_version(vstring)
if limit and version >= limit:
continue
url = url_from_template(template, vstring)
if not tryurl(url):
continue
result.append([url, vstring])
if settings["brute-force-crazy"]:
for v in gen_versions(components, settings["brute-force-level"]):
if v not in versions and tuple(v) not in done:
versions.append(v)
if settings["stop-when-found"]:
break
return result
def euscan(cpv, portdir):
catpkg, ver, rev = portage.pkgsplit(cpv)
if portdir:
portdb = portage.portdbapi(portdir)
else:
portdb = portage.portdbapi()
src_uri, repo = portdb.aux_get(cpv, ['SRC_URI', 'repository'])
metadata = {
"EAPI" : portage.settings["EAPI"],
"SRC_URI" : src_uri,
}
use = frozenset(portage.settings["PORTAGE_USE"].split())
try:
alist = _parse_uri_map(cpv, metadata, use=use)
aalist = _parse_uri_map(cpv, metadata)
except InvalidDependString as e:
red("!!! %s\n" % str(e))
red(_("!!! Invalid SRC_URI for '%s'.\n") % cpv)
del e
return
if "mirror" in portage.settings.features:
fetchme = aalist
else:
fetchme = alist
versions = []
for filename in fetchme:
for fileurl in fetchme[filename]:
if fileurl.startswith('mirror://'):
output.eerror('mirror:// scheme not supported (%s)' % fileurl)
continue
# Try list dir
versions.extend(scan_directory(cpv, fileurl))
if versions and settings['stop-when-found']:
break
# Try manual bump
versions.extend(brute_force(cpv, fileurl))
if versions and settings['stop-when-found']:
break
if versions and not settings["check-all-files"]:
break
newversions = {}
for url, version in versions:
if version in newversions and len(url) < len(newversions[version]):
continue
newversions[version] = url
for version in newversions:
print darkgreen("New Upstream Version: ") + green("%s" % version) + " %s" % newversions[version]
return versions
class Metadata_XML(handler.ContentHandler):
_inside_herd="No"
_inside_maintainer="No"
_inside_email="No"
_inside_longdescription="No"
_herd = []
_maintainers = []
_longdescription = ""
def startElement(self, tag, attr):
if tag == "herd":
self._inside_herd="Yes"
if tag == "longdescription":
self._inside_longdescription="Yes"
if tag == "maintainer":
self._inside_maintainer="Yes"
if tag == "email":
self._inside_email="Yes"
def endElement(self, tag):
if tag == "herd":
self._inside_herd="No"
if tag == "longdescription":
self._inside_longdescription="No"
if tag == "maintainer":
self._inside_maintainer="No"
if tag == "email":
self._inside_email="No"
def characters(self, contents):
if self._inside_herd == "Yes":
self._herd.append(contents)
if self._inside_longdescription == "Yes":
self._longdescription = contents
if self._inside_maintainer=="Yes" and self._inside_email=="Yes":
self._maintainers.append(contents)
def check_metadata(cpv, portdir = None):
"""Checks that the primary maintainer is still an active dev and list the herd the package belongs to"""
if not portdir:
portdb = portage.portdbapi()
repo, = portdb.aux_get(cpv, ['repository'])
portdir = portdb.getRepositoryPath(repo)
metadata_file = portdir + "/" + portage.pkgsplit(cpv)[0] + "/metadata.xml"
if not os.path.exists(metadata_file):
print darkgreen("Maintainer: ") + red("Error (Missing metadata.xml)")
return 1
parser = make_parser()
handler = Metadata_XML()
handler._maintainers = []
parser.setContentHandler(handler)
parser.parse( metadata_file )
if handler._herd:
herds = ", ".join(handler._herd)
print darkgreen("Herd: ") + herds
else:
print darkgreen("Herd: ") + red("Error (No Herd)")
return 1
if handler._maintainers:
print darkgreen("Maintainer: ") + ", ".join(handler._maintainers)
else:
print darkgreen("Maintainer: ") + "none"
if len(handler._longdescription) > 1:
print darkgreen("Description: ") + handler._longdescription
print darkgreen("Location: ") + os.path.normpath(portdir + "/" + portage.pkgsplit(cpv)[0])
def usage(code):
"""Prints the uage information for this script"""
print green("euscan"), "(%s)" % __version__
print
print "Usage: euscan [ebuild|[package-cat/]package[-version]]"
sys.exit(code)
# default color setup
if ( not sys.stdout.isatty() ) or ( portage.settings["NOCOLOR"] in ["yes","true"] ):
nocolor()
def fc(x,y):
return cmp(y[0], x[0])
def main ():
if len( sys.argv ) < 2:
usage(1)
for pkg in sys.argv[1:]:
#try:
if pkg.endswith('.ebuild'):
portdir = os.path.dirname(os.path.dirname(os.path.dirname(pkg)))
package_list = os.path.basname(pkg)
else:
portdir = None
print pkg
package_list = portage.portdb.xmatch("match-all", pkg)
for cpv in package_list:
print darkgreen("Package: ") + cpv
#check_metadata(cpv, portdir)
euscan(cpv, portdir)
print ""
#except Exception, err:
# print red("Error: "+pkg+"\n")
# print err
if __name__ == '__main__':
main()