This commit is contained in:
volpino 2012-07-02 17:45:31 +02:00
commit 5a2f2e9eab
10 changed files with 239 additions and 167 deletions

View File

@ -192,7 +192,7 @@ def parse_args():
getopt_options['short']['global'] = "hVCqv1bf:pm" getopt_options['short']['global'] = "hVCqv1bf:pm"
getopt_options['long']['global'] = [ getopt_options['long']['global'] = [
"help", "version", "nocolor", "quiet", "verbose", "oneshot", "help", "version", "nocolor", "quiet", "verbose", "oneshot",
"brute-force=", "format=" "brute-force=", "format=", "progress"
] ]
short_opts = getopt_options['short']['global'] short_opts = getopt_options['short']['global']
@ -245,7 +245,7 @@ def main():
if CONFIG['verbose'] > 2: if CONFIG['verbose'] > 2:
HTTPConnection.debuglevel = 1 HTTPConnection.debuglevel = 1
if not CONFIG["format"]: if not CONFIG["format"] and not CONFIG['quiet']:
CONFIG["progress"] = False CONFIG["progress"] = False
on_progress = None on_progress = None

View File

@ -19,6 +19,11 @@ class Command(BaseCommand):
dest='all', dest='all',
default=False, default=False,
help='Scan all packages'), help='Scan all packages'),
make_option('--category',
action='store',
dest='category',
default=None,
help='Scan only this category'),
) )
args = '<package package ...>' args = '<package package ...>'
help = 'Scans metadata and fills database' help = 'Scans metadata and fills database'
@ -26,7 +31,7 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
set_verbosity_level(logger, options.get("verbosity", 1)) set_verbosity_level(logger, options.get("verbosity", 1))
if options['all']: if options['all'] or options['category']:
packages = None packages = None
elif len(args): elif len(args):
@ -34,4 +39,8 @@ class Command(BaseCommand):
else: else:
packages = [pkg[:-1] for pkg in sys.stdin.readlines()] packages = [pkg[:-1] for pkg in sys.stdin.readlines()]
scan_metadata(packages=packages, logger=logger) scan_metadata(
packages=packages,
category=options['category'],
logger=logger
)

View File

@ -19,6 +19,11 @@ class Command(BaseCommand):
dest='all', dest='all',
default=False, default=False,
help='Scan all packages'), help='Scan all packages'),
make_option('--category',
action='store',
dest='category',
default=None,
help='Scan only this category'),
make_option('--purge-packages', make_option('--purge-packages',
action='store_true', action='store_true',
dest='purge-packages', dest='purge-packages',
@ -47,7 +52,7 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
set_verbosity_level(logger, options.get("verbosity", 1)) set_verbosity_level(logger, options.get("verbosity", 1))
if options['all']: if options['all'] or options['category']:
packages = None packages = None
elif len(args): elif len(args):
packages = [pkg for pkg in args] packages = [pkg for pkg in args]
@ -56,6 +61,7 @@ class Command(BaseCommand):
scan_portage( scan_portage(
packages=packages, packages=packages,
category=options['category'],
no_log=options["no-log"], no_log=options["no-log"],
purge_packages=options["purge-packages"], purge_packages=options["purge-packages"],
purge_versions=options["purge-versions"], purge_versions=options["purge-versions"],

View File

@ -11,7 +11,7 @@ class Migration(SchemaMigration):
# Adding model 'RefreshPackageQuery' # Adding model 'RefreshPackageQuery'
db.create_table('djeuscan_refreshpackagequery', ( db.create_table('djeuscan_refreshpackagequery', (
('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('query', self.gf('django.db.models.fields.CharField')(unique=True, max_length=256)), ('query', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255)),
('priority', self.gf('django.db.models.fields.IntegerField')(default=0)), ('priority', self.gf('django.db.models.fields.IntegerField')(default=0)),
)) ))
db.send_create_signal('djeuscan', ['RefreshPackageQuery']) db.send_create_signal('djeuscan', ['RefreshPackageQuery'])
@ -104,7 +104,7 @@ class Migration(SchemaMigration):
'Meta': {'object_name': 'RefreshPackageQuery'}, 'Meta': {'object_name': 'RefreshPackageQuery'},
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'priority': ('django.db.models.fields.IntegerField', [], {'default': '0'}), 'priority': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'query': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '256'}) 'query': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'})
}, },
'djeuscan.version': { 'djeuscan.version': {
'Meta': {'unique_together': "(['package', 'slot', 'revision', 'version', 'overlay'],)", 'object_name': 'Version'}, 'Meta': {'unique_together': "(['package', 'slot', 'revision', 'version', 'overlay'],)", 'object_name': 'Version'},

View File

@ -269,7 +269,7 @@ class MaintainerLog(Log):
class RefreshPackageQuery(models.Model): class RefreshPackageQuery(models.Model):
query = models.CharField(max_length=256, unique=True) query = models.CharField(max_length=255, unique=True)
priority = models.IntegerField(default=0) priority = models.IntegerField(default=0)
def __unicode__(self): def __unicode__(self):

View File

@ -1,3 +1,5 @@
import os.path
from gentoolkit.query import Query from gentoolkit.query import Query
from gentoolkit.errors import GentoolkitFatalError from gentoolkit.errors import GentoolkitFatalError
@ -8,17 +10,19 @@ from django.core.exceptions import ValidationError
from djeuscan.models import Package, Herd, Maintainer from djeuscan.models import Package, Herd, Maintainer
from djeuscan.processing import FakeLogger from djeuscan.processing import FakeLogger
class ScanMetadata(object): class ScanMetadata(object):
def __init__(self, logger=None): def __init__(self, logger=None):
self.style = color_style() self.style = color_style()
self.logger = logger or FakeLogger() self.logger = logger or FakeLogger()
@commit_on_success
def scan(self, query=None, obj=None): def scan(self, query=None, obj=None):
matches = Query(query).find( matches = Query(query).smart_find(
include_masked=True, in_installed=True,
in_installed=False, in_porttree=True,
in_overlay=True,
include_masked=True,
show_progress=False,
no_matches_fatal=False,
) )
if not matches: if not matches:
@ -39,65 +43,52 @@ class ScanMetadata(object):
else: else:
created = False created = False
try:
obj.homepage = pkg.environment("HOMEPAGE")
obj.description = pkg.environment("DESCRIPTION")
except GentoolkitFatalError, err:
self.logger.error(
self.style.ERROR(
"Gentoolkit fatal error: '%s'" % str(err)
)
)
if created: if created:
self.logger.info('+ [p] %s/%s' % (pkg.category, pkg.name)) self.logger.info('+ [p] %s/%s' % (pkg.category, pkg.name))
if pkg.metadata: if not pkg.metadata:
herds = dict( return
[(herd[0], herd) for herd in pkg.metadata.herds(True)]
)
maintainers = dict(
[(m.email, m) for m in pkg.metadata.maintainers()]
)
existing_herds = [h.herd for h in obj.herds.all()] herds = dict(
new_herds = set(herds.keys()).difference(existing_herds) [(herd[0], herd) for herd in pkg.metadata.herds(True)]
old_herds = set(existing_herds).difference(herds.keys()) )
maintainers = dict(
[(m.email, m) for m in pkg.metadata.maintainers()]
)
existing_maintainers = [m.email for m in obj.maintainers.all()] existing_herds = [h.herd for h in obj.herds.all()]
new_maintainers = set( new_herds = set(herds.keys()).difference(existing_herds)
maintainers.keys()).difference(existing_maintainers old_herds = set(existing_herds).difference(herds.keys())
)
old_maintainers = set(
existing_maintainers).difference(maintainers.keys()
)
for herd in obj.herds.all(): existing_maintainers = [m.email for m in obj.maintainers.all()]
if herd.herd in old_herds: new_maintainers = set(maintainers.keys()).difference(existing_maintainers)
obj.herds.remove(herd) old_maintainers = set(existing_maintainers).difference(maintainers.keys())
for herd in new_herds: for herd in obj.herds.all():
herd = self.store_herd(*herds[herd]) if herd.herd in old_herds:
obj.herds.add(herd) obj.herds.remove(herd)
for maintainer in obj.maintainers.all(): for herd in new_herds:
if maintainer.email in old_maintainers: herd = self.store_herd(*herds[herd])
obj.maintainers.remove(maintainer) obj.herds.add(herd)
for maintainer in new_maintainers: for maintainer in obj.maintainers.all():
maintainer = maintainers[maintainer] if maintainer.email in old_maintainers:
try: obj.maintainers.remove(maintainer)
maintainer = self.store_maintainer(
maintainer.name, maintainer.email for maintainer in new_maintainers:
maintainer = maintainers[maintainer]
try:
maintainer = self.store_maintainer(
maintainer.name, maintainer.email
) )
obj.maintainers.add(maintainer) obj.maintainers.add(maintainer)
except ValidationError: except ValidationError:
self.logger.error( self.logger.error(
self.style.ERROR("Bad maintainer: '%s' '%s'" % \ self.style.ERROR("Bad maintainer: '%s' '%s'" % \
(maintainer.name, maintainer.email)) (maintainer.name, maintainer.email))
) )
obj.save() obj.save()
return True
def store_herd(self, name, email): def store_herd(self, name, email):
if not name: if not name:
@ -134,15 +125,18 @@ class ScanMetadata(object):
) )
return maintainer return maintainer
@commit_on_success
def scan_metadata(packages=None, logger=None): def scan_metadata(packages=None, category=None, logger=None):
scan_handler = ScanMetadata(logger=logger) scan_handler = ScanMetadata(logger=logger)
if not packages:
if category:
packages = Package.objects.filter(category=category)
elif not packages:
packages = Package.objects.all() packages = Package.objects.all()
for pkg in packages: for pkg in packages:
if isinstance(pkg, Package): if isinstance(pkg, Package):
result = scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg)
else: else:
result = scan_handler.scan(pkg) scan_handler.scan(pkg)
return result

View File

@ -56,11 +56,78 @@ class ScanPortage(object):
) )
self._cache['versions'][key] = version self._cache['versions'][key] = version
def scan(self, query=None): def scan_eix_xml(self, query, category=None):
cmd = ['eix', '--xml'] cmd = ['eix', '--xml']
if query: if query:
cmd.extend(['--exact', query]) cmd.extend(['--exact', query])
if category:
cmd.extend(['-C', category])
sub = subprocess.Popen(cmd, stdout=subprocess.PIPE)
output = sub.stdout
try:
parser = iterparse(output, ["start", "end"])
parser.next() # read root tag just for testing output
except ParseError:
if query:
msg = "Unknown package '%s'" % query
else:
msg = "No packages."
self.logger.error(self.style.ERROR(msg))
return
package = {'versions' : []}
category = ""
for event, elem in parser:
if event == "start": # on tag opening
if elem.tag == "category":
category = elem.attrib["name"]
elif elem.tag == "package":
package["package"] = elem.attrib["name"]
package["category"] = category
elif elem.tag in ["description", "homepage"]:
package[elem.tag] = elem.text or ""
elif elem.tag == "version":
# append version data to versions
cpv = "%s/%s-%s" % \
(package["category"], package["package"], elem.attrib["id"])
slot = elem.attrib.get("slot", "")
overlay = elem.attrib.get("repository", "gentoo")
package["versions"].append((cpv, slot, overlay))
elif event == "end": # on tag closing
if elem.tag == "package":
# clean old data
yield package
package = {"versions" : []}
if elem.tag == "category":
# clean old data
category = ""
elem.clear()
def prepare_purge_versions(self, packages, query=None, category=None):
if not self.purge_versions:
return
# Set all versions dead, then set found versions alive and
# delete old versions
if not query:
# Optimisation for --all or --category
self.logger.info('Killing existing versions...')
qs = Version.objects.filter(packaged=True)
if category:
qs.filter(package__category=category)
qs.update(alive=False)
self.logger.info('done')
else:
for package in packages:
Version.objects.filter(package=package, packaged=True).\
update(alive=False)
def scan(self, query=None, category=None):
if not query: if not query:
current_packages = Package.objects.all() current_packages = Package.objects.all()
elif '/' in query: elif '/' in query:
@ -68,78 +135,22 @@ class ScanPortage(object):
current_packages = Package.objects.filter(category=cat, name=pkg) current_packages = Package.objects.filter(category=cat, name=pkg)
else: else:
current_packages = Package.objects.filter(name=query) current_packages = Package.objects.filter(name=query)
if category:
current_packages = current_packages.filter(category=category)
if self.purge_versions: self.prepare_purge_versions(current_packages, query, category)
if not query:
self.logger.info('Killing existing versions...')
Version.objects.filter(packaged=True).update(alive=False)
self.logger.info('done')
else:
for package in current_packages:
Version.objects.filter(package=package, packaged=True).\
update(alive=False)
sub = subprocess.Popen(cmd, stdout=subprocess.PIPE) packages_alive = set()
output = sub.stdout for data in self.scan_eix_xml(query, category):
cat, pkg = data['category'], data['package']
package = self.store_package(cat, pkg, data['homepage'], data['description'])
packages_alive.add("%s/%s" % (cat, pkg))
for cpv, slot, overlay in data['versions']:
self.store_version(package, cpv, slot, overlay)
try: self.purge_old_packages(current_packages, packages_alive)
parser = iterparse(output, ["start", "end"]) self.purge_old_versions()
parser.next() # read root tag just for testing output
except ParseError:
self.logger.error(
self.style.ERROR(
"Unknown package '%s'" % query
)
)
else:
cat, pkg, homepage, desc = ("", "", "", "")
versions = []
packages_alive = set()
for event, elem in parser:
if event == "start": # on tag opening
if elem.tag == "category":
cat = elem.attrib["name"]
if elem.tag == "package":
pkg = elem.attrib["name"]
if elem.tag == "description":
desc = elem.text or ""
if elem.tag == "homepage":
homepage = elem.text or ""
if elem.tag == "version":
# append version data to versions
cpv = "%s/%s-%s" % (cat, pkg, elem.attrib["id"])
slot = elem.attrib.get("slot", "")
overlay = elem.attrib.get("overlay", "")
versions.append((cpv, slot, overlay))
elif event == "end": # on tag closing
if elem.tag == "package":
# package tag has been closed, saving everything!
package = self.store_package(cat, pkg, homepage,
desc)
packages_alive.add('%s/%s' % (cat, pkg))
for cpv, slot, overlay in versions:
self.store_version(package, cpv, slot, overlay)
# clean old data
pkg, homepage, desc = ("", "", "")
versions = []
if elem.tag == "category":
# clean old data
cat = ""
elem.clear()
if self.purge_packages:
for package in current_packages:
cp = "%s/%s" % (package.category, package.name)
if cp not in packages_alive:
self.logger.info('- [p] %s' % (package))
package.delete()
if self.purge_versions:
self.purge_old_versions(current_packages)
def store_package(self, cat, pkg, homepage, description): def store_package(self, cat, pkg, homepage, description):
created = False created = False
@ -157,14 +168,6 @@ class ScanPortage(object):
if created: if created:
self.logger.info('+ [p] %s/%s' % (cat, pkg)) self.logger.info('+ [p] %s/%s' % (cat, pkg))
# Set all versions dead, then set found versions alive and
# delete old versions
if not self.purge_versions:
Version.objects.filter(
package=obj,
packaged=True
).update(alive=False)
return obj return obj
def store_version(self, package, cpv, slot, overlay): def store_version(self, package, cpv, slot, overlay):
@ -223,18 +226,21 @@ class ScanPortage(object):
overlay=obj.overlay overlay=obj.overlay
) )
def purge_old_versions(self, packages): def purge_old_packages(self, packages, alive):
# For each dead versions if not self.purge_packages:
if packages: return
versions = []
for package in packages:
qs = Version.objects.filter(package=package, packaged=True,
alive=False)
for version in qs:
versions.append(version)
else:
versions = Version.objects.filter(packaged=True, alive=False)
for package in packages:
cp = "%s/%s" % (package.category, package.name)
if cp not in alive:
self.logger.info('- [p] %s' % (package))
package.delete()
def purge_old_versions(self):
if not self.purge_versions:
return
versions = Version.objects.filter(packaged=True, alive=False)
for version in versions: for version in versions:
if version.overlay == 'gentoo': if version.overlay == 'gentoo':
version.package.n_packaged -= 1 version.package.n_packaged -= 1
@ -257,11 +263,11 @@ class ScanPortage(object):
overlay=version.overlay overlay=version.overlay
) )
Version.objects.filter(packaged=True, alive=False).delete() versions.delete()
@commit_on_success @commit_on_success
def scan_portage(packages=None, no_log=False, purge_packages=False, def scan_portage(packages=None, category=None, no_log=False, purge_packages=False,
purge_versions=False, prefetch=False, logger=None): purge_versions=False, prefetch=False, logger=None):
logger = logger or FakeLogger() logger = logger or FakeLogger()
@ -287,7 +293,7 @@ def scan_portage(packages=None, no_log=False, purge_packages=False,
logger.info('done') logger.info('done')
if not packages: if not packages:
scan_handler.scan() scan_handler.scan(category=category)
else: else:
for pkg in packages: for pkg in packages:
if isinstance(pkg, Package): if isinstance(pkg, Package):
@ -296,4 +302,4 @@ def scan_portage(packages=None, no_log=False, purge_packages=False,
scan_handler.scan(pkg) scan_handler.scan(pkg)
logger.info('Done.') logger.info('Done.')
return True

View File

@ -1,8 +1,6 @@
import os import os
import re import re
import pkg_resources
import errno import errno
import urllib2 import urllib2
import portage import portage
@ -15,10 +13,9 @@ except ImportError:
import robotparser import robotparser
import urlparse import urlparse
from euscan import CONFIG, BLACKLIST_VERSIONS, ROBOTS_TXT_BLACKLIST_DOMAINS
import euscan import euscan
from euscan import CONFIG, BLACKLIST_VERSIONS, ROBOTS_TXT_BLACKLIST_DOMAINS
from euscan.version import parse_version
def htop_vercmp(a, b): def htop_vercmp(a, b):
def fixver(v): def fixver(v):
@ -182,8 +179,8 @@ def simple_vercmp(a, b):
return r return r
# Fallback # Fallback
a = pkg_resources.parse_version(a) a = pkg_parse_version(a)
b = pkg_resources.parse_version(b) b = pkg_parse_version(b)
if a < b: if a < b:
return -1 return -1
@ -198,8 +195,8 @@ def vercmp(package, a, b):
def version_is_nightly(a, b): def version_is_nightly(a, b):
a = pkg_resources.parse_version(a) a = parse_version(a)
b = pkg_resources.parse_version(b) b = parse_version(b)
''' Try to skip nightly builds when not wanted (www-apps/moodle) ''' ''' Try to skip nightly builds when not wanted (www-apps/moodle) '''
if len(a) != len(b) and len(b) == 2 and len(b[0]) == len('yyyymmdd'): if len(a) != len(b) and len(b) == 2 and len(b[0]) == len('yyyymmdd'):

60
pym/euscan/version.py Normal file
View File

@ -0,0 +1,60 @@
import re
# Stolen from pkg_resources, but importing it is not a good idea
component_re = re.compile(r'(\d+ | [a-z]+ | \.| -)', re.VERBOSE)
replace = {'pre':'c', 'preview':'c','-':'final-','rc':'c','dev':'@'}.get
def _parse_version_parts(s):
for part in component_re.split(s):
part = replace(part,part)
if not part or part=='.':
continue
if part[:1] in '0123456789':
yield part.zfill(8) # pad for numeric comparison
else:
yield '*'+part
yield '*final' # ensure that alpha/beta/candidate are before final
def parse_version(s):
"""Convert a version string to a chronologically-sortable key
This is a rough cross between distutils' StrictVersion and LooseVersion;
if you give it versions that would work with StrictVersion, then it behaves
the same; otherwise it acts like a slightly-smarter LooseVersion. It is
*possible* to create pathological version coding schemes that will fool
this parser, but they should be very rare in practice.
The returned value will be a tuple of strings. Numeric portions of the
version are padded to 8 digits so they will compare numerically, but
without relying on how numbers compare relative to strings. Dots are
dropped, but dashes are retained. Trailing zeros between alpha segments
or dashes are suppressed, so that e.g. "2.4.0" is considered the same as
"2.4". Alphanumeric parts are lower-cased.
The algorithm assumes that strings like "-" and any alpha string that
alphabetically follows "final" represents a "patch level". So, "2.4-1"
is assumed to be a branch or patch of "2.4", and therefore "2.4.1" is
considered newer than "2.4-1", which in turn is newer than "2.4".
Strings like "a", "b", "c", "alpha", "beta", "candidate" and so on (that
come before "final" alphabetically) are assumed to be pre-release versions,
so that the version "2.4" is considered newer than "2.4a1".
Finally, to handle miscellaneous cases, the strings "pre", "preview", and
"rc" are treated as if they were "c", i.e. as though they were release
candidates, and therefore are not as new as a version string that does not
contain them, and "dev" is replaced with an '@' so that it sorts lower than
than any other pre-release tag.
"""
parts = []
for part in _parse_version_parts(s.lower()):
if part.startswith('*'):
if part<'*final': # remove '-' before a prerelease tag
while parts and parts[-1]=='*final-': parts.pop()
# remove trailing zeros from each series of numeric parts
while parts and parts[-1]=='00000000':
parts.pop()
parts.append(part)
return tuple(parts)