Compare commits

...

7 Commits

Author SHA1 Message Date
Alfred Wingate b2cd013b09
Workaround hard to parse $'' strings
Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-03 00:56:41 +02:00
Alfred Wingate e9fd94e1a5
Blacklist urls that don't make sense to scan
Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-03 00:56:41 +02:00
Alfred Wingate e13a62af84
Remove encoding keyword from json()
* Removed in Python 3.9

Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-03 00:56:41 +02:00
Alfred Wingate d93c3154ac
Update GNOME_URL_SOURCE
* It gets redirected eitherway.

Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-03 00:56:41 +02:00
Alfred Wingate 9809d9a805
Add Gitea(+ Forgejo) handler
Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-03 00:56:36 +02:00
Alfred Wingate d217c839a9
Add GitLab handler
Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-02 22:13:44 +02:00
Alfred Wingate aad99f71fe
Use JSON api for PyPi
* "The XML-RPC API will be deprecated in the future. Use of this API is
  not recommended, and existing consumers of the API should migrate to
  the RSS and/or JSON APIs instead."
* "As a result, this API has a very restrictive rate limit and it may be
  necessary to pause between successive requests." As such this also
  gets around this issue for euscan.

https://warehouse.pypa.io/api-reference/xml-rpc.html

Signed-off-by: Alfred Wingate <parona@protonmail.com>
2024-03-02 16:18:34 +02:00
7 changed files with 191 additions and 15 deletions

View File

@ -16,7 +16,8 @@ description = "Ebuild upstream scan utility."
license = {text = "GPL-2.0"}
dependencies = [
"portage",
"beautifulsoup4>=4.8.2"
"beautifulsoup4>=4.8.2",
"packaging"
]
dynamic = ["version"]

View File

@ -53,6 +53,11 @@ BLACKLIST_PACKAGES = [
SCANDIR_BLACKLIST_URLS = [
"mirror://rubygems/(.*)", # Not browsable
"mirror://gentoo/(.*)", # Directory too big
"https://dev.gentoo.org/(.*)", # There shouldn't be releases here
# Waste of time to go through
"https://crates.io/(.*)",
"https://api.nuget.org/(.*)",
"https://myget.org/(.*)",
]
BRUTEFORCE_BLACKLIST_PACKAGES = [

View File

@ -0,0 +1,70 @@
# Copyright 2020-2024 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "gitea"
CONFIDENCE = 100
PRIORITY = 90
# Forgejo strives to be compatible with Gitea API
# https://forgejo.org/2024-02-forking-forward/
_gitea_instances = [
"codeberg.org",
"git.osgeo.org",
"gitea.com",
"gitea.ladish.org",
"gitea.osmocom.org",
"gitea.treehouse.systems",
]
gitea_patterns = [
re.compile(rf"https://(?P<domain>{domain})/(?P<repository>[^/]+/[^/]+)")
for domain in _gitea_instances
]
def can_handle(pkg, url=None):
return url and any([re.search(pattern, url) for pattern in gitea_patterns])
def scan_url(pkg, url, options):
"https://docs.gitea.com/api/1.20/#tag/repository/operation/repoListReleases"
match = [
re.search(pattern, url)
for pattern in gitea_patterns
if re.search(pattern, url) is not None
][0]
domain = match.group("domain")
repository = match.group("repository")
output.einfo(f"Using Gitea API in {domain}: {repository}")
request = helpers.urlopen(f"https://{domain}/api/v1/repos/{repository}/releases")
data = json.load(request)
versions = [release["tag_name"] for release in data]
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = " ".join(
mangling.mangle_url(release["tarball_url"], options)
for release in data
if release["tag_name"] == up_pv
)
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -0,0 +1,82 @@
# Copyright 2020-2024 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import portage
from euscan import helpers, mangling, output
HANDLER_NAME = "gitlab"
CONFIDENCE = 100
PRIORITY = 90
_gitlab_instances = [
"gitlab.com",
"gitlab.freedesktop.org",
"invent.kde.org/",
"gitlab.gnome.org",
"gitlab.kitware.com",
"gitlab.xfce.org",
"code.videolan.org",
"gitlab.xiph.org",
]
gitlab_patterns = [
# Regular expression adapted from pkgcheck
# https://docs.gitlab.com/ee/user/reserved_names.html
re.compile(
rf"https://(?P<domain>{domain})/(?P<repository>((?!api/)\w[^/]*/)+(?!raw/)\w[^/]*)"
)
for domain in _gitlab_instances
]
def can_handle(pkg, url=None):
return url and any([re.search(pattern, url) for pattern in gitlab_patterns])
def scan_url(pkg, url, options):
"https://docs.gitlab.com/ee/api/releases/index.html"
match = [
re.search(pattern, url)
for pattern in gitlab_patterns
if re.search(pattern, url) is not None
][0]
domain = match.group("domain")
repository = match.group("repository")
output.einfo(f"Using GitLab REST API in {domain}: {repository}")
request = helpers.urlopen(
f"https://{domain}/api/v4/projects/{repository.replace('/', '%2F')}/releases"
)
data = json.load(request)
versions = [release["tag_name"] for release in data]
cp, ver, rev = portage.pkgsplit(pkg.cpv)
ret = []
for up_pv in versions:
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = " ".join(
[
mangling.mangle_url(source["url"], options)
for source in [
release["assets"]["sources"]
for release in data
if release["tag_name"] == up_pv
][0]
# prefer tar.bz2
if source["format"] == "tar.bz2"
]
)
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -20,7 +20,7 @@ HANDLER_NAME = "gnome"
CONFIDENCE = 100
PRIORITY = 90
GNOME_URL_SOURCE = "http://ftp.gnome.org/pub/GNOME/sources"
GNOME_URL_SOURCE = "https://download.gnome.org/sources"
def can_handle(_pkg, url=None):
@ -38,7 +38,7 @@ def guess_package(cp, url):
def scan_url(pkg, url, options):
"http://ftp.gnome.org/pub/GNOME/sources/"
"https://download.gnome.org/sources/"
package = {
"data": guess_package(pkg.cpv, url),
"type": "gnome",
@ -55,7 +55,7 @@ def scan_pkg(pkg, options):
content = fp.read()
fp.close()
cache = json.loads(content, encoding="ascii")
cache = json.loads(content)
if cache[0] != 4:
output.eerror("Unknow cache format detected")

View File

@ -1,11 +1,13 @@
# Copyright 2011 Corentin Chary <corentin.chary@gmail.com>
# Copyright 2020-2023 src_prepare group
# Copyright 2020-2024 src_prepare group
# Distributed under the terms of the GNU General Public License v2
import json
import re
import xmlrpc.client
import urllib.error
import portage
from packaging.version import parse
from euscan import helpers, mangling, output
@ -29,7 +31,7 @@ def guess_package(cp, url):
def scan_url(pkg, url, options):
"http://wiki.python.org/moin/PyPiXmlRpc"
"https://peps.python.org/pep-0691/"
package = guess_package(pkg.cpv, url)
return scan_pkg(pkg, {"data": package})
@ -38,15 +40,23 @@ def scan_url(pkg, url, options):
def scan_pkg(pkg, options):
package = options["data"]
output.einfo("Using PyPi XMLRPC: " + package)
output.einfo("Using PyPi JSON API: " + package)
client = xmlrpc.client.ServerProxy("https://pypi.python.org/pypi")
versions = client.package_releases(package)
try:
fp = helpers.urlopen(f"https://pypi.org/pypi/{package}/json/")
except urllib.error.URLError:
return []
except OSError:
return []
if not versions:
return versions
if not fp:
return []
versions.reverse()
data = json.loads(fp.read())
versions = list(data["releases"].keys())
versions.sort(key=parse, reverse=True)
cp, ver, rev = portage.pkgsplit(pkg.cpv)
@ -55,7 +65,12 @@ def scan_pkg(pkg, options):
pv = mangling.mangle_version(up_pv, options)
if helpers.version_filtered(cp, ver, pv):
continue
urls = client.release_urls(package, up_pv)
urls = " ".join([mangling.mangle_url(infos["url"], options) for infos in urls])
urls = " ".join(
[
mangling.mangle_url(file["url"], options)
for file in data["releases"][up_pv]
if file["packagetype"] == "sdist"
]
)
ret.append((urls, pv, HANDLER_NAME, CONFIDENCE))
return ret

View File

@ -153,6 +153,9 @@ def scan_upstream(query, on_progress=None):
else:
uris = pkg.environment("SRC_URI")
# Roundabout way to handle $'' strings
uris = uris.encode("raw_unicode_escape").decode("unicode_escape")
cpv = pkg.cpv
uris = parse_src_uri(uris)