Compare commits
16 Commits
9465c14342
...
master
Author | SHA1 | Date | |
---|---|---|---|
5b9d44fee1 | |||
d8d1767766 | |||
fbd7a4e139 | |||
a7ff66ae04 | |||
5da26b0719 | |||
656f8e155e | |||
294dcc2a9c | |||
c628edc26b | |||
61cbb8e3f9 | |||
b2cd013b09 | |||
e9fd94e1a5 | |||
e13a62af84 | |||
d93c3154ac | |||
9809d9a805 | |||
d217c839a9 | |||
aad99f71fe |
@ -1,15 +1,15 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.11.0
|
||||
rev: 24.2.0
|
||||
hooks:
|
||||
- id: black
|
||||
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.12.0
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.1.4
|
||||
rev: v0.3.0
|
||||
hooks:
|
||||
- id: ruff
|
||||
|
10
TODO
10
TODO
@ -42,11 +42,7 @@ euscan
|
||||
- remote-id type deb repository:
|
||||
-- find out how to get download url (not sure it's possible)
|
||||
|
||||
### remote-id
|
||||
- Propose new remote-id: deb
|
||||
e.g.: <remote-id type="deb">
|
||||
http://mysite.com/deb/dists/stable/main/binary-i386/Packages
|
||||
</remote-id>
|
||||
- Propose new remote-id: freecode
|
||||
e.g.: <remote-id type="freecode">projectname</remote-id>
|
||||
### bugs or unwanted behavior
|
||||
|
||||
- Parsing docs and accepting 404's
|
||||
-- net-analyzer/sensu
|
||||
|
@ -16,7 +16,8 @@ description = "Ebuild upstream scan utility."
|
||||
license = {text = "GPL-2.0"}
|
||||
dependencies = [
|
||||
"portage",
|
||||
"beautifulsoup4>=4.8.2"
|
||||
"beautifulsoup4>=4.8.2",
|
||||
"packaging"
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
|
@ -51,8 +51,13 @@ BLACKLIST_PACKAGES = [
|
||||
]
|
||||
|
||||
SCANDIR_BLACKLIST_URLS = [
|
||||
"mirror://rubygems/(.*)", # Not browsable
|
||||
"https://rubygems.org/(.*)", # Not browsable
|
||||
"mirror://gentoo/(.*)", # Directory too big
|
||||
"https://dev.gentoo.org/(.*)", # There shouldn't be releases here
|
||||
# Waste of time to go through
|
||||
"https://crates.io/(.*)",
|
||||
"https://api.nuget.org/(.*)",
|
||||
"https://myget.org/(.*)",
|
||||
]
|
||||
|
||||
BRUTEFORCE_BLACKLIST_PACKAGES = [
|
||||
|
@ -1,59 +0,0 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import output
|
||||
from euscan.handlers.url import process_scan as url_scan
|
||||
from euscan.helpers import regex_from_template
|
||||
|
||||
HANDLER_NAME = "berlios"
|
||||
CONFIDENCE = 90
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
berlios_regex = r"mirror://berlios/([^/]+)/([^/]+)"
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
if not url:
|
||||
return False
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
if ver not in url:
|
||||
return False
|
||||
|
||||
return re.search(berlios_regex, url)
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
output.einfo("Using BerliOS handler")
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
project, filename = re.search(berlios_regex, url).groups()
|
||||
|
||||
project_page = "http://developer.berlios.de/projects/%s" % project
|
||||
content = urllib.request.urlopen(project_page).read()
|
||||
|
||||
project_id = re.search(r"/project/filelist.php\?group_id=(\d+)", content).group(1)
|
||||
|
||||
base_url = (
|
||||
"http://developer.berlios.de/project/filelist.php?group_id=%s" % project_id
|
||||
)
|
||||
|
||||
file_pattern = regex_from_template(filename.replace(ver, "${PV}"))
|
||||
|
||||
result = url_scan(pkg, base_url, file_pattern)
|
||||
|
||||
ret = []
|
||||
for found_url, pv, _, _ in result:
|
||||
found_url = found_url.replace("prdownload", "download")
|
||||
ret.append((found_url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
@ -1,53 +0,0 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, mangling, output
|
||||
|
||||
HANDLER_NAME = "freecode"
|
||||
CONFIDENCE = 100
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return False
|
||||
|
||||
|
||||
def scan_pkg(pkg, options):
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
package = options["data"].strip()
|
||||
|
||||
output.einfo("Using FreeCode handler: " + package)
|
||||
|
||||
fp = urllib.request.urlopen("http://freecode.com/projects/%s/releases" % package)
|
||||
content = str(fp.read())
|
||||
|
||||
result = re.findall(
|
||||
r'<a href="/projects/%s/releases/(\d+)">([^<]+)</a>' % package, content
|
||||
)
|
||||
|
||||
ret = []
|
||||
for release_id, up_pv in result:
|
||||
pv = mangling.mangle_version(up_pv, options)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
fp = urllib.request.urlopen(
|
||||
f"http://freecode.com/projects/{package}/releases/{release_id}"
|
||||
)
|
||||
content = str(fp.read())
|
||||
download_page = re.findall(r'<a href="(/urls/[^"]+)"', content)[0]
|
||||
fp = urllib.request.urlopen("http://freecode.com%s" % download_page)
|
||||
content = str(fp.read())
|
||||
url = re.findall(
|
||||
r'In case it doesn\'t, click here: <a href="([^"]+)"', content
|
||||
)[0]
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
70
src/euscan/handlers/gitea.py
Normal file
70
src/euscan/handlers/gitea.py
Normal file
@ -0,0 +1,70 @@
|
||||
# Copyright 2020-2024 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, mangling, output
|
||||
|
||||
HANDLER_NAME = "gitea"
|
||||
CONFIDENCE = 100
|
||||
PRIORITY = 90
|
||||
|
||||
# Forgejo strives to be compatible with Gitea API
|
||||
# https://forgejo.org/2024-02-forking-forward/
|
||||
|
||||
_gitea_instances = [
|
||||
"codeberg.org",
|
||||
"git.osgeo.org",
|
||||
"gitea.com",
|
||||
"gitea.ladish.org",
|
||||
"gitea.osmocom.org",
|
||||
"gitea.treehouse.systems",
|
||||
]
|
||||
|
||||
gitea_patterns = [
|
||||
re.compile(rf"https://(?P<domain>{domain})/(?P<repository>[^/]+/[^/]+)")
|
||||
for domain in _gitea_instances
|
||||
]
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return url and any([re.search(pattern, url) for pattern in gitea_patterns])
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
"https://docs.gitea.com/api/1.20/#tag/repository/operation/repoListReleases"
|
||||
|
||||
match = [
|
||||
re.search(pattern, url)
|
||||
for pattern in gitea_patterns
|
||||
if re.search(pattern, url) is not None
|
||||
][0]
|
||||
|
||||
domain = match.group("domain")
|
||||
repository = match.group("repository")
|
||||
|
||||
output.einfo(f"Using Gitea API in {domain}: {repository}")
|
||||
|
||||
request = helpers.urlopen(f"https://{domain}/api/v1/repos/{repository}/releases")
|
||||
|
||||
data = json.load(request)
|
||||
|
||||
versions = [release["tag_name"] for release in data]
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
ret = []
|
||||
for up_pv in versions:
|
||||
pv = mangling.mangle_version(up_pv, options)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
urls = " ".join(
|
||||
mangling.mangle_url(release["tarball_url"], options)
|
||||
for release in data
|
||||
if release["tag_name"] == up_pv
|
||||
)
|
||||
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
@ -1,66 +0,0 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, mangling, output
|
||||
|
||||
HANDLER_NAME = "github"
|
||||
CONFIDENCE = 100
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return url and url.startswith("mirror://github/")
|
||||
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = re.search("^mirror://github/(.*?)/(.*?)/(.*)$", url)
|
||||
|
||||
assert match
|
||||
return (match.group(1), match.group(2), match.group(3))
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
"http://developer.github.com/v3/repos/downloads/"
|
||||
|
||||
user, project, filename = guess_package(pkg.cpv, url)
|
||||
|
||||
# find out where version is expected to be found
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
if ver not in filename:
|
||||
return
|
||||
|
||||
# now create a filename-matching regexp
|
||||
# XXX: supposedly replace first with (?P<foo>...)
|
||||
# and remaining ones with (?P=foo)
|
||||
fnre = re.compile("^%s$" % re.escape(filename).replace(re.escape(ver), "(.*?)"))
|
||||
|
||||
output.einfo(
|
||||
f"Using github API for: project={project} user={user} filename={filename}"
|
||||
)
|
||||
|
||||
dlreq = urllib.request.urlopen(
|
||||
f"https://api.github.com/repos/{user}/{project}/downloads"
|
||||
)
|
||||
dls = json.load(dlreq)
|
||||
|
||||
ret = []
|
||||
for dl in dls:
|
||||
m = fnre.match(dl["name"])
|
||||
|
||||
if m:
|
||||
pv = mangling.mangle_version(m.group(1), options)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
|
||||
url = mangling.mangle_url(dl["html_url"], options)
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
82
src/euscan/handlers/gitlab.py
Normal file
82
src/euscan/handlers/gitlab.py
Normal file
@ -0,0 +1,82 @@
|
||||
# Copyright 2020-2024 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import helpers, mangling, output
|
||||
|
||||
HANDLER_NAME = "gitlab"
|
||||
CONFIDENCE = 100
|
||||
PRIORITY = 90
|
||||
|
||||
_gitlab_instances = [
|
||||
"gitlab.com",
|
||||
"gitlab.freedesktop.org",
|
||||
"invent.kde.org/",
|
||||
"gitlab.gnome.org",
|
||||
"gitlab.kitware.com",
|
||||
"gitlab.xfce.org",
|
||||
"code.videolan.org",
|
||||
"gitlab.xiph.org",
|
||||
]
|
||||
|
||||
gitlab_patterns = [
|
||||
# Regular expression adapted from pkgcheck
|
||||
# https://docs.gitlab.com/ee/user/reserved_names.html
|
||||
re.compile(
|
||||
rf"https://(?P<domain>{domain})/(?P<repository>((?!api/)\w[^/]*/)+(?!raw/)\w[^/]*)"
|
||||
)
|
||||
for domain in _gitlab_instances
|
||||
]
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return url and any([re.search(pattern, url) for pattern in gitlab_patterns])
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
"https://docs.gitlab.com/ee/api/releases/index.html"
|
||||
|
||||
match = [
|
||||
re.search(pattern, url)
|
||||
for pattern in gitlab_patterns
|
||||
if re.search(pattern, url) is not None
|
||||
][0]
|
||||
|
||||
domain = match.group("domain")
|
||||
repository = match.group("repository")
|
||||
|
||||
output.einfo(f"Using GitLab REST API in {domain}: {repository}")
|
||||
|
||||
request = helpers.urlopen(
|
||||
f"https://{domain}/api/v4/projects/{repository.replace('/', '%2F')}/releases"
|
||||
)
|
||||
|
||||
data = json.load(request)
|
||||
|
||||
versions = [release["tag_name"] for release in data]
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
ret = []
|
||||
for up_pv in versions:
|
||||
pv = mangling.mangle_version(up_pv, options)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
urls = " ".join(
|
||||
[
|
||||
mangling.mangle_url(source["url"], options)
|
||||
for source in [
|
||||
release["assets"]["sources"]
|
||||
for release in data
|
||||
if release["tag_name"] == up_pv
|
||||
][0]
|
||||
# prefer tar.bz2
|
||||
if source["format"] == "tar.bz2"
|
||||
]
|
||||
)
|
||||
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
@ -20,7 +20,7 @@ HANDLER_NAME = "gnome"
|
||||
CONFIDENCE = 100
|
||||
PRIORITY = 90
|
||||
|
||||
GNOME_URL_SOURCE = "http://ftp.gnome.org/pub/GNOME/sources"
|
||||
GNOME_URL_SOURCE = "https://download.gnome.org/sources"
|
||||
|
||||
|
||||
def can_handle(_pkg, url=None):
|
||||
@ -38,7 +38,7 @@ def guess_package(cp, url):
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
"http://ftp.gnome.org/pub/GNOME/sources/"
|
||||
"https://download.gnome.org/sources/"
|
||||
package = {
|
||||
"data": guess_package(pkg.cpv, url),
|
||||
"type": "gnome",
|
||||
@ -55,7 +55,7 @@ def scan_pkg(pkg, options):
|
||||
content = fp.read()
|
||||
fp.close()
|
||||
|
||||
cache = json.loads(content, encoding="ascii")
|
||||
cache = json.loads(content)
|
||||
|
||||
if cache[0] != 4:
|
||||
output.eerror("Unknow cache format detected")
|
||||
|
@ -1,47 +0,0 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import re
|
||||
|
||||
import portage
|
||||
|
||||
from euscan import output
|
||||
from euscan.handlers.url import process_scan as url_scan
|
||||
from euscan.helpers import regex_from_template
|
||||
|
||||
HANDLER_NAME = "google-code"
|
||||
CONFIDENCE = 90
|
||||
PRIORITY = 90
|
||||
|
||||
|
||||
package_name_regex = r"http://(.+).googlecode.com/files/.+"
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
if not url:
|
||||
return False
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
if ver not in url:
|
||||
return False
|
||||
|
||||
return re.match(package_name_regex, url)
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
output.einfo("Using Google Code handler")
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
package_name = re.match(package_name_regex, url).group(1)
|
||||
base_url = "http://code.google.com/p/%s/downloads/list" % package_name
|
||||
|
||||
file_pattern = regex_from_template(url.split("/")[-1].replace(ver, "${PV}"))
|
||||
|
||||
result = url_scan(pkg, base_url, file_pattern)
|
||||
|
||||
ret = []
|
||||
for url, pv, _, _ in result:
|
||||
ret.append((url, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
@ -1,11 +1,13 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Copyright 2020-2024 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import json
|
||||
import re
|
||||
import xmlrpc.client
|
||||
import urllib.error
|
||||
|
||||
import portage
|
||||
from packaging.version import parse
|
||||
|
||||
from euscan import helpers, mangling, output
|
||||
|
||||
@ -15,11 +17,11 @@ PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return url and url.startswith("mirror://pypi/")
|
||||
return url and url.startswith("https://files.pythonhosted.org/packages/source/p/")
|
||||
|
||||
|
||||
def guess_package(cp, url):
|
||||
match = re.search(r"mirror://pypi/\w+/(.*)/.*", url)
|
||||
match = re.search(r"https://files.pythonhosted.org/packages/source/p/(.*)/.*", url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
@ -29,7 +31,7 @@ def guess_package(cp, url):
|
||||
|
||||
|
||||
def scan_url(pkg, url, options):
|
||||
"http://wiki.python.org/moin/PyPiXmlRpc"
|
||||
"https://peps.python.org/pep-0691/"
|
||||
|
||||
package = guess_package(pkg.cpv, url)
|
||||
return scan_pkg(pkg, {"data": package})
|
||||
@ -38,15 +40,23 @@ def scan_url(pkg, url, options):
|
||||
def scan_pkg(pkg, options):
|
||||
package = options["data"]
|
||||
|
||||
output.einfo("Using PyPi XMLRPC: " + package)
|
||||
output.einfo("Using PyPi JSON API: " + package)
|
||||
|
||||
client = xmlrpc.client.ServerProxy("https://pypi.python.org/pypi")
|
||||
versions = client.package_releases(package)
|
||||
try:
|
||||
fp = helpers.urlopen(f"https://pypi.org/pypi/{package}/json/")
|
||||
except urllib.error.URLError:
|
||||
return []
|
||||
except OSError:
|
||||
return []
|
||||
|
||||
if not versions:
|
||||
return versions
|
||||
if not fp:
|
||||
return []
|
||||
|
||||
versions.reverse()
|
||||
data = json.loads(fp.read())
|
||||
|
||||
versions = list(data["releases"].keys())
|
||||
|
||||
versions.sort(key=parse, reverse=True)
|
||||
|
||||
cp, ver, rev = portage.pkgsplit(pkg.cpv)
|
||||
|
||||
@ -55,7 +65,12 @@ def scan_pkg(pkg, options):
|
||||
pv = mangling.mangle_version(up_pv, options)
|
||||
if helpers.version_filtered(cp, ver, pv):
|
||||
continue
|
||||
urls = client.release_urls(package, up_pv)
|
||||
urls = " ".join([mangling.mangle_url(infos["url"], options) for infos in urls])
|
||||
urls = " ".join(
|
||||
[
|
||||
mangling.mangle_url(file["url"], options)
|
||||
for file in data["releases"][up_pv]
|
||||
if file["packagetype"] == "sdist"
|
||||
]
|
||||
)
|
||||
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
|
||||
return ret
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
|
||||
# Copyright 2020-2023 src_prepare group
|
||||
# Copyright 2020-2024 src_prepare group
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
import json
|
||||
@ -18,11 +18,11 @@ PRIORITY = 90
|
||||
|
||||
|
||||
def can_handle(pkg, url=None):
|
||||
return url and url.startswith("mirror://rubygems/")
|
||||
return url and url.startswith("https://rubygems.org/")
|
||||
|
||||
|
||||
def guess_gem(cpv, url):
|
||||
match = re.search("mirror://rubygems/(.*).gem", url)
|
||||
match = re.search("https://rubygems.org/gems/(.*).gem", url)
|
||||
if match:
|
||||
cpv = "fake/%s" % match.group(1)
|
||||
|
||||
|
@ -153,6 +153,9 @@ def scan_upstream(query, on_progress=None):
|
||||
else:
|
||||
uris = pkg.environment("SRC_URI")
|
||||
|
||||
# Roundabout way to handle $'' strings
|
||||
uris = uris.encode("raw_unicode_escape").decode("unicode_escape")
|
||||
|
||||
cpv = pkg.cpv
|
||||
|
||||
uris = parse_src_uri(uris)
|
||||
|
Reference in New Issue
Block a user