euscan: added simple support to result confidence

Confidence values are meaningless at the moment. Need to fix it.

Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
volpino 2012-05-23 16:44:44 +02:00
parent a18083bd98
commit 1efeee4134
9 changed files with 29 additions and 15 deletions

View File

@ -266,8 +266,8 @@ def main():
if ret is not None: if ret is not None:
if len(ret) > 0: if len(ret) > 0:
for cp, url, version, handler in ret: for cp, url, version, handler, confidence in ret:
output.result(cp, version, url, handler) output.result(cp, version, url, handler, confidence)
elif not CONFIG['quiet']: elif not CONFIG['quiet']:
output.ewarn( output.ewarn(
"Didn't find any new version, check package's homepage " + "Didn't find any new version, check package's homepage " +

View File

@ -130,11 +130,12 @@ class EuscanOutput(object):
else: else:
raise TypeError("Invalid output format") raise TypeError("Invalid output format")
def result(self, cp, version, url, handler): def result(self, cp, version, url, handler, confidence):
if self.config['format']: if self.config['format']:
_curr = self.queries[self.current_query] _curr = self.queries[self.current_query]
_curr["result"].append( _curr["result"].append(
{"version": version, "urls": [url], "handler": handler} {"version": version, "urls": [url], "handler": handler,
"confidence": confidence}
) )
else: else:
if not self.config['quiet']: if not self.config['quiet']:

View File

@ -6,6 +6,7 @@ import json
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "cpan" HANDLER_NAME = "cpan"
CONFIDENCE = 100.0
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*") _cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
@ -126,7 +127,7 @@ def scan(cpv, url):
if url == orig_url: if url == orig_url:
continue continue
ret.append((url, pv, HANDLER_NAME)) ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -10,7 +10,10 @@ from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
HANDLER_NAME = "generic" HANDLER_NAME = "generic"
CONFIDENCE = 50.0
BRUTEFORCE_HANDLER_NAME = "brute_force" BRUTEFORCE_HANDLER_NAME = "brute_force"
BRUTEFORCE_CONFIDENCE = 30.0
def scan_html(data, url, pattern): def scan_html(data, url, pattern):
@ -88,7 +91,7 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
path = url + path path = url + path
if not steps and path not in orig_url: if not steps and path not in orig_url:
versions.append((path, pv, HANDLER_NAME)) versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
if steps: if steps:
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url) ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
@ -198,7 +201,8 @@ def brute_force(cpv, url):
if not infos: if not infos:
continue continue
result.append([url, version, BRUTEFORCE_HANDLER_NAME]) result.append([url, version, BRUTEFORCE_HANDLER_NAME,
BRUTEFORCE_CONFIDENCE])
if len(result) > CONFIG['brute-force-false-watermark']: if len(result) > CONFIG['brute-force-false-watermark']:
output.einfo( output.einfo(

View File

@ -12,10 +12,10 @@ def can_handle(cpv, url):
def clean_results(results): def clean_results(results):
ret = [] ret = []
for path, version in results: for path, version, confidence in results:
if version == '5SUMS': if version == '5SUMS':
continue continue
ret.append((path, version, HANDLER_NAME)) ret.append((path, version, HANDLER_NAME, confidence))
return ret return ret

View File

@ -6,6 +6,7 @@ import xml.dom.minidom
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "php" HANDLER_NAME = "php"
CONFIDENCE = 100.0
def can_handle(cpv, url): def can_handle(cpv, url):
@ -65,7 +66,7 @@ def scan(cpv, url):
if url == orig_url: if url == orig_url:
continue continue
ret.append((url, pv, HANDLER_NAME)) ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -6,6 +6,7 @@ import portage
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "pypi" HANDLER_NAME = "pypi"
CONFIDENCE = 100.0
def can_handle(cpv, url): def can_handle(cpv, url):
@ -47,7 +48,7 @@ def scan(cpv, url):
continue continue
urls = client.release_urls(package, up_pv) urls = client.release_urls(package, up_pv)
urls = " ".join([infos['url'] for infos in urls]) urls = " ".join([infos['url'] for infos in urls])
ret.append((urls, pv, HANDLER_NAME)) ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -6,6 +6,7 @@ import urllib2
from euscan import helpers, output from euscan import helpers, output
HANDLER_NAME = "rubygem" HANDLER_NAME = "rubygem"
CONFIDENCE = 100.0
def can_handle(cpv, url): def can_handle(cpv, url):
@ -66,7 +67,7 @@ def scan(cpv, url):
if helpers.version_filtered(cp, ver, pv): if helpers.version_filtered(cp, ver, pv):
continue continue
url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv) url = 'http://rubygems.org/gems/%s-%s.gem' % (gem, up_pv)
ret.append((url, pv, HANDLER_NAME)) ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
return ret return ret

View File

@ -15,7 +15,7 @@ from euscan import handlers, helpers, output
def filter_versions(cp, versions): def filter_versions(cp, versions):
filtered = {} filtered = {}
for url, version, handler in versions: for url, version, handler, confidence in versions:
# Try to keep the most specific urls (determinted by the length) # Try to keep the most specific urls (determinted by the length)
if version in filtered and len(url) < len(filtered[version]): if version in filtered and len(url) < len(filtered[version]):
@ -25,10 +25,15 @@ def filter_versions(cp, versions):
if helpers.version_blacklisted(cp, version): if helpers.version_blacklisted(cp, version):
continue continue
filtered[version] = {"url": url, "handler": handler} filtered[version] = {
"url": url,
"handler": handler,
"confidence": confidence
}
return [ return [
(cp, filtered[version]["url"], version, filtered[version]["handler"]) (cp, filtered[version]["url"], version, filtered[version]["handler"],
filtered[version]["confidence"])
for version in filtered for version in filtered
] ]