euscanwww: Fixed broken tasks

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-06-29 17:25:45 +02:00
parent e9c03fbded
commit fc7f929038
7 changed files with 98 additions and 53 deletions

View File

@ -137,7 +137,7 @@ class ScanMetadata(object):
def scan_metadata(packages=None, logger=None): def scan_metadata(packages=None, logger=None):
scan_handler = ScanMetadata(logger=logger) scan_handler = ScanMetadata(logger=logger)
if packages is None: if not packages:
packages = Package.objects.all() packages = Package.objects.all()
for pkg in packages: for pkg in packages:

View File

@ -15,11 +15,10 @@ from djeuscan.models import Package, Version, VersionLog
class ScanPortage(object): class ScanPortage(object):
def __init__(self, logger=None, no_log=False, purge_packages=False, def __init__(self, logger=None, no_log=False, purge_packages=False,
purge_versions=False, kill_versions=False): kill_versions=False):
self.logger = logger or FakeLogger() self.logger = logger or FakeLogger()
self.no_log = no_log self.no_log = no_log
self.purge_packages = purge_packages self.purge_packages = purge_packages
self.purge_versions = purge_versions
self.kill_versions = kill_versions self.kill_versions = kill_versions
self.style = color_style() self.style = color_style()
@ -84,8 +83,13 @@ class ScanPortage(object):
return self._overlays return self._overlays
@commit_on_success
def scan(self, query=None): def scan(self, query=None):
if self.purge_packages:
with commit_on_success():
for package in Package.objects.all():
self.logger.info('- [p] %s' % (package))
package.delete()
cmd = ['eix', '--xml', '--pure-packages', '-x'] cmd = ['eix', '--xml', '--pure-packages', '-x']
if query: if query:
cmd.extend(['--exact', query]) cmd.extend(['--exact', query])
@ -97,7 +101,6 @@ class ScanPortage(object):
output = subprocess.Popen(cmd, stdout=subprocess.PIPE).\ output = subprocess.Popen(cmd, stdout=subprocess.PIPE).\
communicate()[0] communicate()[0]
output = output.strip().strip('\n')
if len(output) == 0: if len(output) == 0:
if not query: if not query:
@ -124,25 +127,27 @@ class ScanPortage(object):
cat = category_tag.getAttribute("name") cat = category_tag.getAttribute("name")
pkg = package_tag.getAttribute("name") pkg = package_tag.getAttribute("name")
homepage_tags = package_tag.getElementsByTagName("homepage") homepage_tags = package_tag.getElementsByTagName("homepage")
homepage = homepage_tags[0].firstChild.nodeValue \ try:
if homepage_tags else "" homepage = homepage_tags[0].firstChild.nodeValue
except (IndexError, AttributeError):
homepage = ""
desc_tags = package_tag.getElementsByTagName("description") desc_tags = package_tag.getElementsByTagName("description")
desc = desc_tags[0].firstChild.nodeValue if desc_tags else "" try:
desc = desc_tags[0].firstChild.nodeValue
except (IndexError, AttributeError):
desc = ""
with commit_on_success():
package = self.store_package(cat, pkg, homepage, desc) package = self.store_package(cat, pkg, homepage, desc)
for version_tag in package_tag.getElementsByTagName("version"): for version_tag in package_tag.\
getElementsByTagName("version"):
cpv = "%s/%s-%s" % (cat, pkg, cpv = "%s/%s-%s" % (cat, pkg,
version_tag.getAttribute("id")) version_tag.getAttribute("id"))
slot = version_tag.getAttribute("slot") slot = version_tag.getAttribute("slot")
overlay = version_tag.getAttribute("overlay") overlay = version_tag.getAttribute("overlay")
self.store_version(package, cpv, slot, overlay) self.store_version(package, cpv, slot, overlay)
if self.purge_packages and not query:
for package in Package.objects.all():
self.logger.info('- [p] %s' % (package))
package.delete()
def store_package(self, cat, pkg, homepage, description): def store_package(self, cat, pkg, homepage, description):
created = False created = False
obj = self.cache_get_package(cat, pkg) obj = self.cache_get_package(cat, pkg)
@ -232,7 +237,7 @@ class ScanPortage(object):
@commit_on_success @commit_on_success
def purge_versions(logger=None, nolog=False): def do_purge_versions(logger=None, no_log=False):
logger = logger or FakeLogger() logger = logger or FakeLogger()
# For each dead versions # For each dead versions
@ -246,7 +251,7 @@ def purge_versions(logger=None, nolog=False):
logger.info('- [v] %s' % (version)) logger.info('- [v] %s' % (version))
if nolog: if no_log:
continue continue
VersionLog.objects.create( VersionLog.objects.create(
@ -274,7 +279,6 @@ def scan_portage(packages=None, no_log=False, purge_packages=False,
logger=logger, logger=logger,
no_log=no_log, no_log=no_log,
purge_packages=purge_packages, purge_packages=purge_packages,
purge_versions=purge_versions,
kill_versions=kill_versions, kill_versions=kill_versions,
) )
@ -288,16 +292,16 @@ def scan_portage(packages=None, no_log=False, purge_packages=False,
scan_handler.cache_store_version(version) scan_handler.cache_store_version(version)
logger.info('done') logger.info('done')
if packages is None: if not packages:
scan_handler.scan() scan_handler.scan()
else: else:
for pkg in packages: for pkg in packages:
if isinstance(pkg, Package): if isinstance(pkg, Package):
scan_handler.scan('%s/%s' % (pkg.category, pkg.name), pkg) scan_handler.scan('%s/%s' % (pkg.category, pkg.name))
else: else:
scan_handler.scan(pkg) scan_handler.scan(pkg)
if purge_versions: if purge_versions:
purge_versions(logger=logger, no_log=no_log) do_purge_versions(logger=logger, no_log=no_log)
logger.info('Done.') logger.info('Done.')
return True return True

View File

@ -16,6 +16,7 @@ class ScanUpstream(object):
def scan(self, package): def scan(self, package):
CONFIG["format"] = "dict" CONFIG["format"] = "dict"
output.clean()
output.set_query(package) output.set_query(package)
euscan_scan_upstream(package) euscan_scan_upstream(package)
@ -34,7 +35,14 @@ class ScanUpstream(object):
obj = self.store_package(cpv) obj = self.store_package(cpv)
for res in out[package]["result"]: for res in out[package]["result"]:
self.store_version(obj, res["version"], " ".join(res["urls"])) self.store_version(
obj,
res["version"],
" ".join(res["urls"]),
res["type"],
res["handler"],
res["confidence"],
)
self.store_result(obj, out_json, scan_time, ebuild) self.store_result(obj, out_json, scan_time, ebuild)
@ -66,10 +74,17 @@ class ScanUpstream(object):
return obj return obj
def store_version(self, package, ver, url): def store_version(self, package, ver, url, version_type, handler,
confidence):
obj, created = Version.objects.get_or_create( obj, created = Version.objects.get_or_create(
package=package, slot='', revision='r0', version=ver, overlay='', package=package,
defaults={"alive": True, "urls": url, "packaged": False} slot='',
revision='r0',
version=ver,
overlay='',
defaults={"alive": True, "urls": url, "packaged": False,
"version_type": version_type, "handler": handler,
"confidence": confidence}
) )
if not created: if not created:
obj.alive = True obj.alive = True
@ -97,7 +112,7 @@ class ScanUpstream(object):
@commit_on_success @commit_on_success
def purge_versions(logger=None): def do_purge_versions(logger=None):
logger = logger or FakeLogger() logger = logger or FakeLogger()
# For each dead versions # For each dead versions
@ -126,17 +141,21 @@ def scan_upstream(packages=None, purge_versions=False,
logger.info('Scanning upstream...') logger.info('Scanning upstream...')
if packages is None: if not packages:
packages = Package.objects.all() packages = Package.objects.all()
result = True
for pkg in packages: for pkg in packages:
if isinstance(pkg, Package): if isinstance(pkg, Package):
result = scan_handler.scan('%s/%s' % (pkg.category, pkg.name)) curr = scan_handler.scan('%s/%s' % (pkg.category, pkg.name))
else: else:
result = scan_handler.scan(pkg) curr = scan_handler.scan(pkg)
if not curr:
result = False
if purge_versions: if purge_versions:
purge_versions(logger=logger) do_purge_versions(logger=logger)
logger.info('Done.') logger.info('Done.')
return result return result

View File

@ -58,7 +58,7 @@ def _run_in_chunks(task, packages, kwargs=None,
job = TaskSet(tasks=tasks) job = TaskSet(tasks=tasks)
result = job.apply_async() result = job.apply_async()
# TODO: understand why this causes timeout # TODO: understand why this causes timeout
output.extend(list(result.join(timeout=3600))) output.extend(list(result.join()))
return output return output
@ -102,7 +102,7 @@ def scan_metadata_list_task(query):
Runs a parallel metadata scan for packages in the query list (space Runs a parallel metadata scan for packages in the query list (space
separated string). Task used only from the web interface. separated string). Task used only from the web interface.
""" """
_run_in_chunks(_scan_metadata_task, [p for p in query.split()]) return _run_in_chunks(_scan_metadata_task, [p for p in query.split()])
@task @task
@ -110,7 +110,7 @@ def scan_metadata_all_task():
""" """
Runs a parallel metadata scan for all packages Runs a parallel metadata scan for all packages
""" """
_run_in_chunks(_scan_metadata_task, Package.objects.all()) return _run_in_chunks(_scan_metadata_task, Package.objects.all())
@task @task
@ -120,8 +120,11 @@ def _scan_portage_task(packages, no_log=False, purge_packages=False,
Scans portage for the given set of packages Scans portage for the given set of packages
""" """
logger = _scan_portage_task.get_logger() logger = _scan_portage_task.get_logger()
if packages:
logger.info("Starting portage scanning subtask for %d packages...", logger.info("Starting portage scanning subtask for %d packages...",
len(packages)) len(packages))
else:
logger.info("Starting portage scanning for all packages...")
result = scan_portage( result = scan_portage(
packages=packages, packages=packages,
@ -145,18 +148,23 @@ def scan_portage_list_task(query, no_log=False, purge_packages=False,
""" """
kwargs = {"no_log": no_log, "purge_packages": purge_packages, kwargs = {"no_log": no_log, "purge_packages": purge_packages,
"purge_versions": purge_versions, "prefetch": prefetch} "purge_versions": purge_versions, "prefetch": prefetch}
_run_in_chunks(_scan_portage_task, [p for p in query.split()], kwargs) return _run_in_chunks(_scan_portage_task, [p for p in query.split()],
kwargs)
@task @task
def scan_portage_all_task(no_log=False, purge_packages=False, def scan_portage_all_task(no_log=False, purge_packages=False,
purge_versions=False, prefetch=False): purge_versions=False, prefetch=False):
""" """
Runs a parallel portage scan for all packages Runs a syncronous portage scan for all packages
""" """
kwargs = {"no_log": no_log, "purge_packages": purge_packages, return _scan_portage_task(
"purge_versions": purge_versions, "prefetch": prefetch} packages=None,
_run_in_chunks(_scan_metadata_task, Package.objects.all(), kwargs) no_log=no_log,
purge_packages=purge_packages,
purge_versions=purge_versions,
prefetch=prefetch,
)
@task @task
@ -187,7 +195,8 @@ def scan_upstream_list_task(query, purge_versions=False):
""" """
kwargs = {"purge_versions": purge_versions} kwargs = {"purge_versions": purge_versions}
_run_in_chunks(_scan_upstream_task, [p for p in query.split()], kwargs) return _run_in_chunks(_scan_upstream_task, [p for p in query.split()],
kwargs)
@task @task
@ -196,7 +205,7 @@ def scan_upstream_all_task(purge_versions=False):
Runs a parallel portage scan for all packages Runs a parallel portage scan for all packages
""" """
kwargs = {"purge_versions": purge_versions} kwargs = {"purge_versions": purge_versions}
_run_in_chunks(_scan_upstream_task, Package.objects.all(), kwargs) return _run_in_chunks(_scan_upstream_task, Package.objects.all(), kwargs)
@task @task

View File

@ -235,7 +235,7 @@ CELERY_RESULT_BACKEND = "amqp"
BROKER_CONNECTION_TIMEOUT = 3600 BROKER_CONNECTION_TIMEOUT = 3600
CELERYD_CONCURRENCY = 4 CELERYD_CONCURRENCY = 4
TASKS_CONCURRENTLY = 8 TASKS_CONCURRENTLY = 4
TASKS_SUBTASK_PACKAGES = 32 TASKS_SUBTASK_PACKAGES = 32
# LDAP authentication # LDAP authentication

View File

@ -67,12 +67,20 @@ class EOutputMem(EOutput):
self.out = StringIO() self.out = StringIO()
def getvalue(self): def getvalue(self):
return re.sub("\033\[[0-9;]+m", "", self.out.getvalue()) return clean_colors(self.out.getvalue())
def _write(self, f, msg): def _write(self, f, msg):
super(EOutputMem, self)._write(self.out, msg) super(EOutputMem, self)._write(self.out, msg)
def clean_colors(string):
if type(string) is str:
string = re.sub("\033\[[0-9;]+m", "", string)
string = re.sub(r"\\u001b\[[0-9;]+m", "", string)
string = re.sub(r"\x1b\[[0-9;]+m", "", string)
return string
class EuscanOutput(object): class EuscanOutput(object):
""" """
Class that handles output for euscan Class that handles output for euscan
@ -141,7 +149,8 @@ class EuscanOutput(object):
def metadata(self, key, value, show=True): def metadata(self, key, value, show=True):
if self.config["format"]: if self.config["format"]:
self.queries[self.current_query]["metadata"][key] = value self.queries[self.current_query]["metadata"][key] = \
clean_colors(value)
elif show: elif show:
print "%s: %s" % (key.capitalize(), value) print "%s: %s" % (key.capitalize(), value)

View File

@ -45,9 +45,13 @@ def filter_versions(cp, versions):
def scan_upstream_urls(cpv, urls, on_progress): def scan_upstream_urls(cpv, urls, on_progress):
versions = [] versions = []
if on_progress:
progress_available = 70 progress_available = 70
num_urls = sum([len(urls[fn]) for fn in urls]) num_urls = sum([len(urls[fn]) for fn in urls])
if num_urls > 0:
progress_increment = progress_available / num_urls progress_increment = progress_available / num_urls
else:
progress_increment = 0
for filename in urls: for filename in urls:
for url in urls[filename]: for url in urls[filename]: