bd797a8563
Signed-off-by: Corentin Chary <corentincj@iksaif.net>
546 lines
13 KiB
Python
Executable File
546 lines
13 KiB
Python
Executable File
#!/usr/bin/python
|
|
##############################################################################
|
|
# $Header: $
|
|
##############################################################################
|
|
# Distributed under the terms of the GNU General Public License, v2 or later
|
|
# Author: Corentin Chary <corentin.chary@gmail.com>
|
|
|
|
# Gentoo new upstream release scan tool.
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
import StringIO
|
|
from stat import *
|
|
from xml.sax import saxutils, make_parser, handler
|
|
from xml.sax.handler import feature_namespaces
|
|
|
|
import urllib
|
|
import urllib2
|
|
|
|
import pkg_resources
|
|
|
|
import portage
|
|
from portage.output import *
|
|
from portage.dbapi.porttree import _parse_uri_map
|
|
from portage.exception import InvalidDependString
|
|
|
|
__version__ = "svn"
|
|
|
|
settings = {
|
|
"brute-force-level" : 2,
|
|
"brute-force" : True,
|
|
"brute-force-crazy" : True,
|
|
"scan-dir" : True,
|
|
"format" : "pretty",
|
|
"verbose" : True,
|
|
"stop-when-found" : False,
|
|
"check-all-files" : False,
|
|
}
|
|
|
|
output = EOutput()
|
|
output.quiet = not settings['verbose']
|
|
|
|
def cast_int_components(version):
|
|
for i, obj in enumerate(version):
|
|
try:
|
|
version[i] = int(obj)
|
|
except ValueError:
|
|
pass
|
|
return version
|
|
|
|
def parse_version(version):
|
|
version = pkg_resources.parse_version(version)
|
|
#version = list(version)
|
|
#return cast_int_components(version)
|
|
return version
|
|
|
|
|
|
def template_from_url(url, version):
|
|
prefix, chunks = url.split('://')
|
|
chunks = chunks.split('/')
|
|
|
|
for i in range(len(chunks)):
|
|
chunk = chunks[i]
|
|
|
|
if not chunk:
|
|
continue
|
|
|
|
# If it's the full version, it's easy
|
|
if version in chunk:
|
|
chunk = chunk.replace(version, '${PV}')
|
|
# For directories made from a part of the version
|
|
elif version.startswith(chunk):
|
|
full = split_version(version)
|
|
part = split_version(chunk)
|
|
|
|
for j in range(min(len(full), len(part))):
|
|
|
|
if part[j] != full[j]:
|
|
break
|
|
part[j] = '${%d}' % j
|
|
|
|
chunk = join_version(part)
|
|
chunk = chunk.replace('}$', '}.$')
|
|
|
|
chunks[i] = chunk
|
|
|
|
return prefix + "://" + "/".join(chunks)
|
|
|
|
def url_from_template(url, version):
|
|
components = split_version(version)
|
|
|
|
url = url.replace('${PV}', version)
|
|
for i in range(len(components)):
|
|
url = url.replace('${%d}' % i, str(components[i]))
|
|
|
|
return url
|
|
|
|
# Stolen from distutils.LooseVersion
|
|
# Used for brute force to increment the version
|
|
def split_version(version):
|
|
component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
|
|
components = filter(lambda x: x and x != '.', component_re.split(version))
|
|
for i in range(len(components)):
|
|
try:
|
|
components[i] = int(components[i])
|
|
except ValueError:
|
|
pass
|
|
return components
|
|
|
|
def join_version(components):
|
|
version = ""
|
|
for i in range(len(components)):
|
|
version += str(components[i])
|
|
if i >= len(components) - 1:
|
|
break
|
|
if type(components[i]) != str and type(components[i + 1]) != str:
|
|
version += "."
|
|
return version
|
|
|
|
def increment_version(components, level):
|
|
n = len(components)
|
|
|
|
if level > n - 1 or level < 0:
|
|
raise Exception
|
|
|
|
for i in range(n, level + 1, -1):
|
|
if type(components[i - 1]) == int:
|
|
components[i - 1] = 0
|
|
|
|
if type(components[level]) == int:
|
|
components[level] += 1
|
|
|
|
return components
|
|
|
|
def gen_versions(components, level):
|
|
n = len(components)
|
|
depth = level
|
|
level = min(level, n)
|
|
|
|
if not n:
|
|
return []
|
|
|
|
versions = []
|
|
|
|
for i in range(n, n - level, -1):
|
|
increment_version(components, i - 1)
|
|
for j in range(depth):
|
|
versions.append(list(components))
|
|
increment_version(components, i - 1)
|
|
|
|
return versions
|
|
|
|
def tryurl(fileurl):
|
|
result = False
|
|
|
|
output.ebegin("Trying: " + fileurl)
|
|
|
|
try:
|
|
fp = urllib2.urlopen(fileurl, None, 5)
|
|
headers = fp.info()
|
|
|
|
basename = os.path.basename(fileurl)
|
|
|
|
if 'Content-disposition' in headers and basename not in headers['Content-disposition']:
|
|
result = False
|
|
elif 'Content-Length' in headers and headers['Content-Length'] == '0':
|
|
result = False
|
|
elif 'text/html' in headers['Content-Type']:
|
|
result = False
|
|
else:
|
|
result = True
|
|
except:
|
|
retult = False
|
|
|
|
output.eend(errno.ENOENT if not result else 0)
|
|
|
|
return result
|
|
|
|
def regex_from_template(template):
|
|
template = re.escape(template)
|
|
template = template.replace('\$\{', '${')
|
|
template = template.replace('\}', '}')
|
|
template = template.replace('}\.$', '}.$')
|
|
template = re.sub(r'(\$\{\d+\}\.?)+', r'([\w\.\-]+?)', template)
|
|
#template = re.sub(r'(\$\{\d+\}\.+)+', '(.+?)\.', template)
|
|
#template = re.sub(r'(\$\{\d+\})+', '(.+?)', template)
|
|
template = template.replace('${PV}', r'([\w\.\-]+?)')
|
|
template = template + r'/?$'
|
|
return template
|
|
|
|
def basedir_from_template(template):
|
|
idx = template.find('${')
|
|
if idx == -1:
|
|
return template
|
|
|
|
idx = template[0:idx].rfind('/')
|
|
if idx == -1:
|
|
return ""
|
|
|
|
return template[0:idx]
|
|
|
|
def generate_scan_paths(url):
|
|
prefix, chunks = url.split('://')
|
|
chunks = chunks.split('/')
|
|
|
|
steps = []
|
|
|
|
path = prefix + ":/"
|
|
for chunk in chunks:
|
|
if '${' in chunk:
|
|
steps.append((path, regex_from_template(chunk)))
|
|
path = ""
|
|
else:
|
|
path += "/"
|
|
path += chunk
|
|
return steps
|
|
|
|
def scan_directory_recursive(url, steps, vmin, vmax):
|
|
if not steps:
|
|
return []
|
|
|
|
url += steps[0][0]
|
|
pattern = steps[0][1]
|
|
|
|
steps = steps[1:]
|
|
|
|
output.einfo("Scanning: %s" % url)
|
|
|
|
try:
|
|
fp = urllib2.urlopen(url, None, 5)
|
|
except Exception, err:
|
|
return []
|
|
|
|
data = fp.read()
|
|
|
|
results = []
|
|
|
|
if re.search("<\s*a\s+[^>]*href", data):
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
soup = BeautifulSoup(data)
|
|
|
|
for link in soup.findAll('a'):
|
|
href = link.get("href")
|
|
if not href:
|
|
continue
|
|
if href.startswith(url):
|
|
href = href.replace(url, "", 1)
|
|
|
|
match = re.match(pattern, href)
|
|
if match:
|
|
results.append((match.group(1), match.group(0)))
|
|
|
|
elif url.startswith('ftp://'): # Probably a FTP Server
|
|
buf = StringIO.StringIO(data)
|
|
for line in buf.readlines():
|
|
line = line.replace("\n", "").replace("\r", "")
|
|
match = re.search(pattern, line)
|
|
if match:
|
|
results.append((match.group(1), match.group(0)))
|
|
# add url
|
|
|
|
versions = []
|
|
|
|
for version, path in results:
|
|
ver = parse_version(version)
|
|
if vmin and ver <= vmin:
|
|
continue
|
|
if vmax and ver >= vmax:
|
|
continue
|
|
|
|
if not url.endswith('/') and not path.startswith('/'):
|
|
path = url + '/' + path
|
|
else:
|
|
path = url + path
|
|
|
|
versions.append((path, version))
|
|
if steps:
|
|
ret = scan_directory_recursive(path, steps, vmin, vmax)
|
|
versions.extend(ret)
|
|
return versions
|
|
|
|
def scan_directory(cpv, fileurl, limit=None):
|
|
# Ftp: list dir
|
|
# Handle mirrors
|
|
if not settings["scan-dir"]:
|
|
return []
|
|
|
|
catpkg, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
template = template_from_url(fileurl, ver)
|
|
|
|
if '${' not in template:
|
|
output.ewarn("Url doesn't seems to depend on version: %s not found in %s"
|
|
% (ver, fileurl))
|
|
return []
|
|
else:
|
|
output.einfo("Scanning: %s" % template)
|
|
|
|
vmin = parse_version(ver)
|
|
|
|
steps = generate_scan_paths(template)
|
|
return scan_directory_recursive("", steps, vmin, limit)
|
|
|
|
def brute_force(cpv, fileurl, limit=None):
|
|
if not settings["brute-force"]:
|
|
return []
|
|
|
|
catpkg, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
components = split_version(ver)
|
|
versions = gen_versions(components, settings["brute-force-level"])
|
|
|
|
output.einfo("Generating version from " + ver)
|
|
|
|
if not versions:
|
|
output.ewarn("Can't generate new versions from " + ver)
|
|
return []
|
|
|
|
template = template_from_url(fileurl, ver)
|
|
|
|
if '${' not in template:
|
|
output.ewarn("Url doesn't seems to depend on version: %s not found in %s"
|
|
% (fileurl, ver))
|
|
return []
|
|
else:
|
|
output.einfo("Brute forcing: %s" % template)
|
|
|
|
result = []
|
|
|
|
i = 0
|
|
done = []
|
|
while i < len(versions):
|
|
components = versions[i]
|
|
i += 1
|
|
if components in done:
|
|
continue
|
|
done.append(tuple(components))
|
|
|
|
vstring = join_version(components)
|
|
version = parse_version(vstring)
|
|
|
|
if limit and version >= limit:
|
|
continue
|
|
|
|
url = url_from_template(template, vstring)
|
|
|
|
if not tryurl(url):
|
|
continue
|
|
|
|
result.append([url, vstring])
|
|
|
|
if settings["brute-force-crazy"]:
|
|
for v in gen_versions(components, settings["brute-force-level"]):
|
|
if v not in versions and tuple(v) not in done:
|
|
versions.append(v)
|
|
|
|
if settings["stop-when-found"]:
|
|
break
|
|
|
|
return result
|
|
|
|
def euscan(cpv, portdir):
|
|
catpkg, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
if portdir:
|
|
portdb = portage.portdbapi(portdir)
|
|
else:
|
|
portdb = portage.portdbapi()
|
|
|
|
src_uri, repo = portdb.aux_get(cpv, ['SRC_URI', 'repository'])
|
|
|
|
metadata = {
|
|
"EAPI" : portage.settings["EAPI"],
|
|
"SRC_URI" : src_uri,
|
|
}
|
|
use = frozenset(portage.settings["PORTAGE_USE"].split())
|
|
try:
|
|
alist = _parse_uri_map(cpv, metadata, use=use)
|
|
aalist = _parse_uri_map(cpv, metadata)
|
|
except InvalidDependString as e:
|
|
red("!!! %s\n" % str(e))
|
|
red(_("!!! Invalid SRC_URI for '%s'.\n") % cpv)
|
|
del e
|
|
return
|
|
|
|
if "mirror" in portage.settings.features:
|
|
fetchme = aalist
|
|
else:
|
|
fetchme = alist
|
|
|
|
versions = []
|
|
|
|
for filename in fetchme:
|
|
for fileurl in fetchme[filename]:
|
|
if fileurl.startswith('mirror://'):
|
|
output.eerror('mirror:// scheme not supported (%s)' % fileurl)
|
|
continue
|
|
|
|
# Try list dir
|
|
versions.extend(scan_directory(cpv, fileurl))
|
|
|
|
if versions and settings['stop-when-found']:
|
|
break
|
|
|
|
# Try manual bump
|
|
versions.extend(brute_force(cpv, fileurl))
|
|
|
|
if versions and settings['stop-when-found']:
|
|
break
|
|
|
|
if versions and not settings["check-all-files"]:
|
|
break
|
|
|
|
newversions = {}
|
|
|
|
for url, version in versions:
|
|
if version in newversions and len(url) < len(newversions[version]):
|
|
continue
|
|
newversions[version] = url
|
|
|
|
for version in newversions:
|
|
print darkgreen("New Upstream Version: ") + green("%s" % version) + " %s" % newversions[version]
|
|
return versions
|
|
|
|
class Metadata_XML(handler.ContentHandler):
|
|
_inside_herd="No"
|
|
_inside_maintainer="No"
|
|
_inside_email="No"
|
|
_inside_longdescription="No"
|
|
|
|
_herd = []
|
|
_maintainers = []
|
|
_longdescription = ""
|
|
|
|
def startElement(self, tag, attr):
|
|
if tag == "herd":
|
|
self._inside_herd="Yes"
|
|
if tag == "longdescription":
|
|
self._inside_longdescription="Yes"
|
|
if tag == "maintainer":
|
|
self._inside_maintainer="Yes"
|
|
if tag == "email":
|
|
self._inside_email="Yes"
|
|
|
|
def endElement(self, tag):
|
|
if tag == "herd":
|
|
self._inside_herd="No"
|
|
if tag == "longdescription":
|
|
self._inside_longdescription="No"
|
|
if tag == "maintainer":
|
|
self._inside_maintainer="No"
|
|
if tag == "email":
|
|
self._inside_email="No"
|
|
|
|
def characters(self, contents):
|
|
if self._inside_herd == "Yes":
|
|
self._herd.append(contents)
|
|
|
|
if self._inside_longdescription == "Yes":
|
|
self._longdescription = contents
|
|
|
|
if self._inside_maintainer=="Yes" and self._inside_email=="Yes":
|
|
self._maintainers.append(contents)
|
|
|
|
|
|
def check_metadata(cpv, portdir = None):
|
|
"""Checks that the primary maintainer is still an active dev and list the herd the package belongs to"""
|
|
if not portdir:
|
|
portdb = portage.portdbapi()
|
|
repo, = portdb.aux_get(cpv, ['repository'])
|
|
portdir = portdb.getRepositoryPath(repo)
|
|
|
|
metadata_file = portdir + "/" + portage.pkgsplit(cpv)[0] + "/metadata.xml"
|
|
|
|
if not os.path.exists(metadata_file):
|
|
print darkgreen("Maintainer: ") + red("Error (Missing metadata.xml)")
|
|
return 1
|
|
|
|
parser = make_parser()
|
|
handler = Metadata_XML()
|
|
handler._maintainers = []
|
|
parser.setContentHandler(handler)
|
|
parser.parse( metadata_file )
|
|
|
|
if handler._herd:
|
|
herds = ", ".join(handler._herd)
|
|
print darkgreen("Herd: ") + herds
|
|
else:
|
|
print darkgreen("Herd: ") + red("Error (No Herd)")
|
|
return 1
|
|
|
|
|
|
if handler._maintainers:
|
|
print darkgreen("Maintainer: ") + ", ".join(handler._maintainers)
|
|
else:
|
|
print darkgreen("Maintainer: ") + "none"
|
|
|
|
if len(handler._longdescription) > 1:
|
|
print darkgreen("Description: ") + handler._longdescription
|
|
print darkgreen("Location: ") + os.path.normpath(portdir + "/" + portage.pkgsplit(cpv)[0])
|
|
|
|
|
|
def usage(code):
|
|
"""Prints the uage information for this script"""
|
|
print green("euscan"), "(%s)" % __version__
|
|
print
|
|
print "Usage: euscan [ebuild|[package-cat/]package[-version]]"
|
|
sys.exit(code)
|
|
|
|
|
|
# default color setup
|
|
if ( not sys.stdout.isatty() ) or ( portage.settings["NOCOLOR"] in ["yes","true"] ):
|
|
nocolor()
|
|
|
|
def fc(x,y):
|
|
return cmp(y[0], x[0])
|
|
|
|
def main ():
|
|
if len( sys.argv ) < 2:
|
|
usage(1)
|
|
|
|
for pkg in sys.argv[1:]:
|
|
#try:
|
|
if pkg.endswith('.ebuild'):
|
|
portdir = os.path.dirname(os.path.dirname(os.path.dirname(pkg)))
|
|
package_list = os.path.basname(pkg)
|
|
else:
|
|
portdir = None
|
|
print pkg
|
|
package_list = portage.portdb.xmatch("match-all", pkg)
|
|
|
|
for cpv in package_list:
|
|
print darkgreen("Package: ") + cpv
|
|
#check_metadata(cpv, portdir)
|
|
euscan(cpv, portdir)
|
|
print ""
|
|
#except Exception, err:
|
|
# print red("Error: "+pkg+"\n")
|
|
# print err
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|