Change source layout
* In preparation for PEP517 transition. Signed-off-by: Alfred Wingate <parona@protonmail.com>
This commit is contained in:
276
src/euscan/handlers/generic.py
Normal file
276
src/euscan/handlers/generic.py
Normal file
@ -0,0 +1,276 @@
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import re
|
||||
import io
|
||||
import difflib
|
||||
|
||||
try:
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
except ImportError:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import output, helpers, mangling, CONFIG, SCANDIR_BLACKLIST_URLS, \
|
||||
BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS
|
||||
|
||||
HANDLER_NAME = "generic"
|
||||
CONFIDENCE = 45
|
||||
PRIORITY = 0
|
||||
|
||||
BRUTEFORCE_HANDLER_NAME = "brute_force"
|
||||
BRUTEFORCE_CONFIDENCE = 30
|
||||
|
||||
|
||||
def confidence_score(found, original, minimum=CONFIDENCE):
|
||||
found_p = urlparse(found)
|
||||
original_p = urlparse(original)
|
||||
|
||||
# check if the base url is the same
|
||||
if found_p.netloc != original_p.netloc:
|
||||
return minimum
|
||||
|
||||
# check if the directory depth is the same
|
||||
if len(found_p.path.split("/")) != len(original_p.path.split("/")):
|
||||
return minimum
|
||||
|
||||
# strip numbers
|
||||
found_path = re.sub(r"[\d+\.]?", "", found_p.path)
|
||||
original_path = re.sub(r"[\d+\.]?", "", original_p.path)
|
||||
|
||||
# strip the first equal part of the path
|
||||
i = 0
|
||||
max_i = len(found_path)
|
||||
while i < max_i and found_path[i] == original_path[i]:
|
||||
i += 1
|
||||
found_path = found_path[i:]
|
||||
original_path = original_path[i:]
|
||||
|
||||
# calculate difference ratio
|
||||
diff = difflib.SequenceMatcher(None, found_path, original_path).ratio()
|
||||
return int(minimum + minimum * diff) # maximum score is minimum * 2
|
||||
|
||||
|
||||
def scan_html(data, url, pattern):
|
||||
soup = BeautifulSoup(data, features="lxml")
|
||||
results = []
|
||||
|
||||
for link in soup.findAll('a'):
|
||||
href = link.get("href")
|
||||
if not href:
|
||||
continue
|
||||
|
||||
if href.startswith(url):
|
||||
href = href.replace(url, "", 1)
|
||||
|
||||
match = re.search(pattern, href, re.I)
|
||||
if match:
|
||||
results.append(
|
||||
(".".join([x for x in match.groups() if x is not None]),
|
||||
match.group(0))
|
||||
)
|
||||
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def scan_ftp(data, url, pattern):
|
||||
buf = io.StringIO(data)
|
||||
results = []
|
||||
|
||||
for line in buf.readlines():
|
||||
line = line.replace("\n", "").replace("\r", "")
|
||||
match = re.search(pattern, line, re.I)
|
||||
if match:
|
||||
results.append(
|
||||
(".".join([x for x in match.groups() if x is not None]),
|
||||
match.group(0))
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def scan_directory_recursive(cp, ver, rev, url, steps, orig_url, options):
|
||||
if not steps:
|
||||
return []
|
||||
|
||||
url += steps[0][0]
|
||||
pattern = steps[0][1]
|
||||
|
||||
steps = steps[1:]
|
||||
|
||||
output.einfo("Scanning: %s" % url)
|
||||
|
||||
try:
|
||||
fp = helpers.urlopen(url)
|
||||
except urllib.error.URLError:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
data = fp.read()
|
||||
|
||||
results = []
|
||||
|
||||
if re.search(b"<\s*a\s+[^>]*href", data, re.I):
|
||||
results.extend(scan_html(data, url, pattern))
|
||||
elif url.startswith('ftp://'):
|
||||
results.extend(scan_ftp(data, url, pattern))
|
||||
|
||||
versions = []
|
||||
|
||||
for up_pv, path in results:
|
||||
pv = mangling.mangle_version(up_pv, options)
|
||||
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
if not url.endswith("/"):
|
||||
url = url + "/"
|
||||
path = urljoin(url, path)
|
||||
|
||||
if not steps and path not in orig_url:
|
||||
confidence = confidence_score(path, orig_url)
|
||||
path = mangling.mangle_url(path, options)
|
||||
versions.append((path, pv, HANDLER_NAME, confidence))
|
||||
|
||||
if steps:
|
||||
ret = scan_directory_recursive(cp, ver, rev, path, steps, orig_url,
|
||||
options)
|
||||
versions.extend(ret)
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
if CONFIG["scan-dir"]:
|
||||
for bu in SCANDIR_BLACKLIST_URLS:
|
||||
if re.match(bu, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (url, bu))
|
||||
return []
|
||||
|
||||
resolved_url = helpers.parse_mirror(url)
|
||||
if not resolved_url:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
# 'Hack' for _beta/_rc versions where _ is used instead of -
|
||||
if ver not in resolved_url:
|
||||
newver = helpers.version_change_end_sep(ver)
|
||||
if newver and newver in resolved_url:
|
||||
output.einfo(
|
||||
"Version: using %s instead of %s" % (newver, ver)
|
||||
)
|
||||
ver = newver
|
||||
|
||||
template = helpers.template_from_url(resolved_url, ver)
|
||||
if '${' not in template:
|
||||
output.einfo(
|
||||
"Url doesn't seems to depend on version: %s not found in %s" %
|
||||
(ver, resolved_url)
|
||||
)
|
||||
return []
|
||||
else:
|
||||
output.einfo("Scanning: %s" % template)
|
||||
|
||||
steps = helpers.generate_scan_paths(template)
|
||||
ret = scan_directory_recursive(cp, ver, rev, "", steps, url, options)
|
||||
|
||||
if not ret:
|
||||
ret = brute_force(pkg, url)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def brute_force(pkg, url):
|
||||
if CONFIG["brute-force"] == 0:
|
||||
return []
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
url = helpers.parse_mirror(url)
|
||||
if not url:
|
||||
return []
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
|
||||
if re.match(bp, cp):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
for bp in BRUTEFORCE_BLACKLIST_URLS:
|
||||
if re.match(bp, url):
|
||||
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
||||
return []
|
||||
|
||||
output.einfo("Generating version from " + ver)
|
||||
|
||||
components = helpers.split_version(ver)
|
||||
versions = helpers.gen_versions(components, CONFIG["brute-force"])
|
||||
|
||||
# Remove unwanted versions
|
||||
for v in versions:
|
||||
if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0:
|
||||
versions.remove(v)
|
||||
|
||||
if not versions:
|
||||
output.einfo("Can't generate new versions from " + ver)
|
||||
return []
|
||||
|
||||
template = helpers.template_from_url(url, ver)
|
||||
|
||||
if '${PV}' not in template:
|
||||
output.einfo(
|
||||
"Url doesn't seems to depend on full version: %s not found in %s" %
|
||||
(ver, url))
|
||||
return []
|
||||
else:
|
||||
output.einfo("Brute forcing: %s" % template)
|
||||
|
||||
result = []
|
||||
|
||||
i = 0
|
||||
done = []
|
||||
|
||||
while i < len(versions):
|
||||
components = versions[i]
|
||||
i += 1
|
||||
if components in done:
|
||||
continue
|
||||
done.append(tuple(components))
|
||||
|
||||
version = helpers.join_version(components)
|
||||
|
||||
if helpers.version_filtered(cp, ver, version):
|
||||
continue
|
||||
|
||||
try_url = helpers.url_from_template(template, version)
|
||||
infos = helpers.tryurl(try_url, template)
|
||||
|
||||
if not infos:
|
||||
continue
|
||||
confidence = confidence_score(try_url, url,
|
||||
minimum=BRUTEFORCE_CONFIDENCE)
|
||||
result.append([try_url, version, BRUTEFORCE_HANDLER_NAME, confidence])
|
||||
|
||||
if len(result) > CONFIG['brute-force-false-watermark']:
|
||||
output.einfo(
|
||||
"Broken server detected ! Skipping brute force."
|
||||
)
|
||||
return []
|
||||
|
||||
if CONFIG["brute-force-recursive"]:
|
||||
for v in helpers.gen_versions(list(components),
|
||||
CONFIG["brute-force"]):
|
||||
if v not in versions and tuple(v) not in done:
|
||||
versions.append(v)
|
||||
|
||||
if CONFIG["oneshot"]:
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def can_handle(pkg, url):
|
||||
return True
|
Reference in New Issue
Block a user