euscan: confidence tweak in generic handler
Signed-off-by: volpino <fox91@anche.no>
This commit is contained in:
		@@ -6,7 +6,7 @@ import json
 | 
				
			|||||||
from euscan import helpers, output
 | 
					from euscan import helpers, output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "cpan"
 | 
					HANDLER_NAME = "cpan"
 | 
				
			||||||
CONFIDENCE = 100.0
 | 
					CONFIDENCE = 100
 | 
				
			||||||
PRIORITY = 90
 | 
					PRIORITY = 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
 | 
					_cpan_package_name_re = re.compile("mirror://cpan/authors/.*/([^/.]*).*")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,8 @@
 | 
				
			|||||||
from urlparse import urljoin
 | 
					from urlparse import urljoin, urlparse
 | 
				
			||||||
import urllib2
 | 
					import urllib2
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import StringIO
 | 
					import StringIO
 | 
				
			||||||
 | 
					import difflib
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from BeautifulSoup import BeautifulSoup
 | 
					    from BeautifulSoup import BeautifulSoup
 | 
				
			||||||
@@ -14,11 +15,40 @@ from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, \
 | 
				
			|||||||
    BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
 | 
					    BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output, helpers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "generic"
 | 
					HANDLER_NAME = "generic"
 | 
				
			||||||
CONFIDENCE = 50.0
 | 
					CONFIDENCE = 45
 | 
				
			||||||
PRIORITY = 0
 | 
					PRIORITY = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BRUTEFORCE_HANDLER_NAME = "brute_force"
 | 
					BRUTEFORCE_HANDLER_NAME = "brute_force"
 | 
				
			||||||
BRUTEFORCE_CONFIDENCE = 30.0
 | 
					BRUTEFORCE_CONFIDENCE = 30
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def confidence_score(found, original, minimum=CONFIDENCE):
 | 
				
			||||||
 | 
					    found_p = urlparse(found)
 | 
				
			||||||
 | 
					    original_p = urlparse(original)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check if the base url is the same
 | 
				
			||||||
 | 
					    if found_p.netloc != original_p.netloc:
 | 
				
			||||||
 | 
					        return minimum
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check if the directory depth is the same
 | 
				
			||||||
 | 
					    if len(found_p.path.split("/")) != len(original_p.path.split("/")):
 | 
				
			||||||
 | 
					        return minimum
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # strip numbers
 | 
				
			||||||
 | 
					    found_path = re.sub(r"[\d+\.]?", "", found_p.path)
 | 
				
			||||||
 | 
					    original_path = re.sub(r"[\d+\.]?", "", original_p.path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # strip the first equal part of the path
 | 
				
			||||||
 | 
					    i = 0
 | 
				
			||||||
 | 
					    max_i = len(found_path)
 | 
				
			||||||
 | 
					    while i < max_i and found_path[i] == original_path[i]:
 | 
				
			||||||
 | 
					        i += 1
 | 
				
			||||||
 | 
					    found_path = found_path[i:]
 | 
				
			||||||
 | 
					    original_path = original_path[i:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # calculate difference ratio
 | 
				
			||||||
 | 
					    diff = difflib.SequenceMatcher(None, found_path, original_path).ratio()
 | 
				
			||||||
 | 
					    return int(minimum + minimum * diff)  # maximum score is minimum * 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def scan_html(data, url, pattern):
 | 
					def scan_html(data, url, pattern):
 | 
				
			||||||
@@ -98,7 +128,8 @@ def scan_directory_recursive(cp, ver, rev, url, steps, orig_url):
 | 
				
			|||||||
        path = urljoin(url, path)
 | 
					        path = urljoin(url, path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not steps and path not in orig_url:
 | 
					        if not steps and path not in orig_url:
 | 
				
			||||||
            versions.append((path, pv, HANDLER_NAME, CONFIDENCE))
 | 
					            confidence = confidence_score(path, orig_url)
 | 
				
			||||||
 | 
					            versions.append((path, pv, HANDLER_NAME, confidence))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if steps:
 | 
					        if steps:
 | 
				
			||||||
            ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
 | 
					            ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url)
 | 
				
			||||||
@@ -209,14 +240,14 @@ def brute_force(pkg, url):
 | 
				
			|||||||
        if helpers.version_filtered(cp, ver, version):
 | 
					        if helpers.version_filtered(cp, ver, version):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        url = helpers.url_from_template(template, version)
 | 
					        try_url = helpers.url_from_template(template, version)
 | 
				
			||||||
        infos = helpers.tryurl(url, template)
 | 
					        infos = helpers.tryurl(try_url, template)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not infos:
 | 
					        if not infos:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					        confidence = confidence_score(try_url, url,
 | 
				
			||||||
        result.append([url, version, BRUTEFORCE_HANDLER_NAME,
 | 
					                                      minimum=BRUTEFORCE_CONFIDENCE)
 | 
				
			||||||
                       BRUTEFORCE_CONFIDENCE])
 | 
					        result.append([try_url, version, BRUTEFORCE_HANDLER_NAME, confidence])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if len(result) > CONFIG['brute-force-false-watermark']:
 | 
					        if len(result) > CONFIG['brute-force-false-watermark']:
 | 
				
			||||||
            output.einfo(
 | 
					            output.einfo(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@ import portage
 | 
				
			|||||||
from euscan import helpers, output
 | 
					from euscan import helpers, output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "github"
 | 
					HANDLER_NAME = "github"
 | 
				
			||||||
CONFIDENCE = 100.0
 | 
					CONFIDENCE = 100
 | 
				
			||||||
PRIORITY = 90
 | 
					PRIORITY = 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,7 +6,7 @@ import xml.dom.minidom
 | 
				
			|||||||
from euscan import helpers, output
 | 
					from euscan import helpers, output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "php"
 | 
					HANDLER_NAME = "php"
 | 
				
			||||||
CONFIDENCE = 100.0
 | 
					CONFIDENCE = 100
 | 
				
			||||||
PRIORITY = 90
 | 
					PRIORITY = 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,7 +6,7 @@ import portage
 | 
				
			|||||||
from euscan import helpers, output
 | 
					from euscan import helpers, output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "pypi"
 | 
					HANDLER_NAME = "pypi"
 | 
				
			||||||
CONFIDENCE = 100.0
 | 
					CONFIDENCE = 100
 | 
				
			||||||
PRIORITY = 90
 | 
					PRIORITY = 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,7 +6,7 @@ import urllib2
 | 
				
			|||||||
from euscan import helpers, output
 | 
					from euscan import helpers, output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HANDLER_NAME = "rubygems"
 | 
					HANDLER_NAME = "rubygems"
 | 
				
			||||||
CONFIDENCE = 100.0
 | 
					CONFIDENCE = 100
 | 
				
			||||||
PRIORITY = 90
 | 
					PRIORITY = 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user