2011-08-31 15:38:32 +02:00
|
|
|
import urllib2
|
|
|
|
import re
|
|
|
|
import StringIO
|
|
|
|
|
|
|
|
from BeautifulSoup import BeautifulSoup
|
|
|
|
|
|
|
|
import portage
|
|
|
|
|
|
|
|
from euscan import CONFIG, SCANDIR_BLACKLIST_URLS, BRUTEFORCE_BLACKLIST_PACKAGES, BRUTEFORCE_BLACKLIST_URLS, output
|
|
|
|
from euscan import helpers
|
|
|
|
|
|
|
|
def scan_html(data, url, pattern):
|
|
|
|
soup = BeautifulSoup(data)
|
|
|
|
results = []
|
|
|
|
|
|
|
|
for link in soup.findAll('a'):
|
|
|
|
href = link.get("href")
|
|
|
|
if not href:
|
|
|
|
continue
|
|
|
|
if href.startswith(url):
|
|
|
|
href = href.replace(url, "", 1)
|
|
|
|
|
|
|
|
match = re.match(pattern, href, re.I)
|
|
|
|
if match:
|
|
|
|
results.append((match.group(1), match.group(0)))
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
def scan_ftp(data, url, pattern):
|
|
|
|
buf = StringIO.StringIO(data)
|
|
|
|
results = []
|
|
|
|
|
|
|
|
for line in buf.readlines():
|
|
|
|
line = line.replace("\n", "").replace("\r", "")
|
|
|
|
match = re.search(pattern, line, re.I)
|
|
|
|
if match:
|
|
|
|
results.append((match.group(1), match.group(0)))
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
def scan_directory_recursive(cpv, url, steps):
|
|
|
|
if not steps:
|
|
|
|
return []
|
|
|
|
|
|
|
|
cp, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
url += steps[0][0]
|
|
|
|
pattern = steps[0][1]
|
|
|
|
|
|
|
|
steps = steps[1:]
|
|
|
|
|
|
|
|
output.einfo("Scanning: %s" % url)
|
|
|
|
|
|
|
|
try:
|
|
|
|
fp = helpers.urlopen(url)
|
|
|
|
except urllib2.URLError:
|
|
|
|
return []
|
|
|
|
except IOError:
|
|
|
|
return []
|
|
|
|
|
2011-09-06 16:32:29 +02:00
|
|
|
if not fp:
|
|
|
|
return []
|
|
|
|
|
2011-08-31 15:38:32 +02:00
|
|
|
data = fp.read()
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
|
|
if re.search("<\s*a\s+[^>]*href", data):
|
|
|
|
results.extend(scan_html(data, url, pattern))
|
|
|
|
elif url.startswith('ftp://'):
|
|
|
|
results.extend(scan_ftp(data, url, pattern))
|
|
|
|
|
|
|
|
versions = []
|
|
|
|
|
|
|
|
for version, path in results:
|
|
|
|
if helpers.version_filtered(cp, ver, version):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not url.endswith('/') and not path.startswith('/'):
|
|
|
|
path = url + '/' + path
|
|
|
|
else:
|
|
|
|
path = url + path
|
|
|
|
|
|
|
|
versions.append((path, version))
|
|
|
|
|
|
|
|
if steps:
|
|
|
|
ret = scan_directory_recursive(cpv, path, steps)
|
|
|
|
versions.extend(ret)
|
|
|
|
|
|
|
|
return versions
|
|
|
|
|
|
|
|
def scan(cpv, url):
|
|
|
|
for bu in SCANDIR_BLACKLIST_URLS:
|
|
|
|
if re.match(bu, url):
|
|
|
|
output.einfo("%s is blacklisted by rule %s" % (url, bu))
|
|
|
|
return []
|
|
|
|
|
|
|
|
resolved_url = helpers.parse_mirror(url)
|
|
|
|
|
|
|
|
cp, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
|
|
|
|
template = helpers.template_from_url(resolved_url, ver)
|
|
|
|
if '${' not in template:
|
|
|
|
output.einfo("Url doesn't seems to depend on version: %s not found in %s"
|
|
|
|
% (ver, resolved_url))
|
|
|
|
return []
|
|
|
|
else:
|
|
|
|
output.einfo("Scanning: %s" % template)
|
|
|
|
|
|
|
|
steps = helpers.generate_scan_paths(template)
|
|
|
|
return scan_directory_recursive(cpv, "", steps)
|
|
|
|
|
|
|
|
def brute_force(cpv, url):
|
|
|
|
cp, ver, rev = portage.pkgsplit(cpv)
|
|
|
|
|
|
|
|
url = helpers.parse_mirror(url)
|
|
|
|
|
|
|
|
for bp in BRUTEFORCE_BLACKLIST_PACKAGES:
|
|
|
|
if re.match(bp, cp):
|
|
|
|
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
|
|
|
return []
|
|
|
|
|
|
|
|
for bp in BRUTEFORCE_BLACKLIST_URLS:
|
|
|
|
if re.match(bp, url):
|
|
|
|
output.einfo("%s is blacklisted by rule %s" % (cp, bp))
|
|
|
|
return []
|
|
|
|
|
|
|
|
output.einfo("Generating version from " + ver)
|
|
|
|
|
|
|
|
components = helpers.split_version(ver)
|
|
|
|
versions = helpers.gen_versions(components, CONFIG["brute-force"])
|
|
|
|
|
|
|
|
""" Remove unwanted versions """
|
|
|
|
for v in versions:
|
|
|
|
if helpers.vercmp(cp, ver, helpers.join_version(v)) >= 0:
|
|
|
|
versions.remove(v)
|
|
|
|
|
|
|
|
if not versions:
|
|
|
|
output.einfo("Can't generate new versions from " + ver)
|
|
|
|
return []
|
|
|
|
|
|
|
|
template = helpers.template_from_url(url, ver)
|
|
|
|
|
|
|
|
if '${PV}' not in template:
|
|
|
|
output.einfo("Url doesn't seems to depend on full version: %s not found in %s"
|
|
|
|
% (ver, url))
|
|
|
|
return []
|
|
|
|
else:
|
|
|
|
output.einfo("Brute forcing: %s" % template)
|
|
|
|
|
|
|
|
result = []
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
done = []
|
|
|
|
|
|
|
|
while i < len(versions):
|
|
|
|
components = versions[i]
|
|
|
|
i += 1
|
|
|
|
if components in done:
|
|
|
|
continue
|
|
|
|
done.append(tuple(components))
|
|
|
|
|
|
|
|
version = helpers.join_version(components)
|
|
|
|
|
|
|
|
if helpers.version_filtered(cp, ver, version):
|
|
|
|
continue
|
|
|
|
|
|
|
|
url = helpers.url_from_template(template, version)
|
|
|
|
infos = helpers.tryurl(url, template)
|
|
|
|
|
|
|
|
if not infos:
|
|
|
|
continue
|
|
|
|
|
|
|
|
result.append([url, version])
|
|
|
|
|
|
|
|
if CONFIG["brute-force-recursive"]:
|
2011-09-06 15:47:00 +02:00
|
|
|
for v in helpers.gen_versions(list(components), CONFIG["brute-force"]):
|
2011-08-31 15:38:32 +02:00
|
|
|
if v not in versions and tuple(v) not in done:
|
|
|
|
versions.append(v)
|
|
|
|
|
|
|
|
if CONFIG["oneshot"]:
|
|
|
|
break
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
def can_handle(cpv, url):
|
|
|
|
return True
|