summaryrefslogtreecommitdiff
path: root/dev-python/nltk
diff options
context:
space:
mode:
Diffstat (limited to 'dev-python/nltk')
-rw-r--r--dev-python/nltk/Manifest6
-rw-r--r--dev-python/nltk/files/python-3.12.patch282
-rw-r--r--dev-python/nltk/metadata.xml14
-rw-r--r--dev-python/nltk/nltk-3.7.ebuild62
-rw-r--r--dev-python/nltk/nltk-3.8.1.ebuild49
5 files changed, 413 insertions, 0 deletions
diff --git a/dev-python/nltk/Manifest b/dev-python/nltk/Manifest
new file mode 100644
index 0000000..b3360f6
--- /dev/null
+++ b/dev-python/nltk/Manifest
@@ -0,0 +1,6 @@
+AUX python-3.12.patch 8925 BLAKE2B a9ad62b0b8d37b8707c6fa880af98ce6ee8e3f02a1d6cf42456a6d1dc2398f7cbf8721eb9e580594531c963dc10232f2e259939ed4dd78b5f6b9d778e2c2c7f7 SHA512 b32631b7d0b805496188b30e4ea8bcb3bd6cccee98b2f3cf5ccffcd8467c6e333654d23ab71b78ca1030aff54ac2e2137efff076bf86bd991bfd0a3077a6543f
+DIST nltk-3.7.gh.tar.gz 2851947 BLAKE2B 4cd4fed9f26de7996bcabbd71d500c072489e79ae0edb29f09577e465227c948efa7130af6975b029e77fb79db015437a9e3311eb28048d02dcc2356441d0908 SHA512 66d79afa59f18f2bf7d086dcb22813c7b92239c05669daa62866dbda06c0ca22367317749cf19db3e41a7bdb405d4f87f72b695aa58fd0669bf36de64f1f9888
+DIST nltk-3.8.1.gh.tar.gz 2867926 BLAKE2B 54de1d3ae7cfe0b71ac0f5a62ed3e81e489fa4d322cd32eb4aa4ef60617eb96767a0f3a92c7e5a16e71836cb0d3a1076ae3598b6bcf0af5925c95e1a0ccf70f8 SHA512 5b13eff5d8e628173f5321c293d896919d369bcd586861a7e09bf6fdca2b09f2580902da98ec647bcf9cdc2b33a87c830dda3793de20c31c8bc38c86df8f7930
+EBUILD nltk-3.7.ebuild 1747 BLAKE2B ba7e28721bb9ed45e93552b6def4299f6bd139f197ff849cda59083f3ff51cf7fe035e10d4fc643d74e00a9a76951930475a91bae1e305afce90768bc4e4b4d4 SHA512 790ef13f1b57e887de26b745a8c03479ae8e38117380607936848f4a72eb113b456024fc2e47905fbc767b301cc5a36895520524a62d1ec2af6134dfcfedac26
+EBUILD nltk-3.8.1.ebuild 1394 BLAKE2B bf5cdfbd9eb5c1b415de3039f5af1be36c34e8a3c0245efc908c941028b34119d59486dc0c8649771e386808d0c10ab1ab52d1bc651fd8157cd9b9fcc77bfb8e SHA512 3f4c996aab04efd784d948199a7b96bc58372e453216ae290a7c2f903f67f6518cd39162f55900fb1e047badab3d3d6a1fd53c330901f8ccb3acbd0a7023bd78
+MISC metadata.xml 445 BLAKE2B 4bfcde721368b9c9354799863a7a78d2c04eb789672f52b5449bda3ef567448fa1b434dda9d150aa0ff360477c576ea2fd0bebce40bf80bb843393e245f85278 SHA512 89bb6d0ca2e268e3540cf36e91b57e834eb31f078a91bb7ed7218a17c1a9b63809ae77916822eabf6a3a86286d00ae4a311b33c52e40d9d2e61b6d242f4cef78
diff --git a/dev-python/nltk/files/python-3.12.patch b/dev-python/nltk/files/python-3.12.patch
new file mode 100644
index 0000000..c55d285
--- /dev/null
+++ b/dev-python/nltk/files/python-3.12.patch
@@ -0,0 +1,282 @@
+commit 544bf807bf506927872b7b837c6e1514e80419ab
+Author: Konstantin Chernyshev <k4black@ya.ru>
+Date: Thu Nov 16 19:40:54 2023 +0100
+
+ fix(bleu): add Fraction with 3.12 support
+
+diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py
+index 990b76406..b472d785f 100644
+--- a/nltk/test/unit/translate/test_bleu.py
++++ b/nltk/test/unit/translate/test_bleu.py
+@@ -2,7 +2,6 @@
+ Tests for BLEU translation evaluation metric
+ """
+
+-import io
+ import unittest
+
+ import numpy as np
+diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
+index da445bc3e..9647202e6 100644
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -7,16 +7,40 @@
+ # For license information, see LICENSE.TXT
+
+ """BLEU score implementation."""
++from __future__ import annotations
+
+ import math
+ import sys
+ import warnings
+ from collections import Counter
+-from fractions import Fraction
++from dataclasses import dataclass
+
+ from nltk.util import ngrams
+
+
++@dataclass
++class Fraction:
++ """
++ This class is used to represent a fraction with both the numerator and denominator saved for later retrieval.
++ Python 3.12 removed _normalize=False from the standard lib Fraction constructor.
++ """
++
++ numerator: int | float
++ denominator: int = 1
++
++ def __float__(self):
++ return self.numerator / self.denominator
++
++ def __lt__(self, other):
++ return float(self) < float(other)
++
++ def __eq__(self, other):
++ return self.numerator == other.numerator and self.denominator == other.denominator
++
++ def __gt__(self, other):
++ return float(self) > float(other)
++
++
+ def sentence_bleu(
+ references,
+ hypothesis,
+@@ -222,7 +246,7 @@ def corpus_bleu(
+
+ # Collects the various precision values for the different ngram orders.
+ p_n = [
+- Fraction(p_numerators[i], p_denominators[i], _normalize=False)
++ Fraction(p_numerators[i], p_denominators[i])
+ for i in range(1, max_weight_length + 1)
+ ]
+
+@@ -365,7 +389,7 @@ def modified_precision(references, hypothesis, n):
+ # Usually this happens when the ngram order is > len(reference).
+ denominator = max(1, sum(counts.values()))
+
+- return Fraction(numerator, denominator, _normalize=False)
++ return Fraction(numerator, denominator)
+
+
+ def closest_ref_length(references, hyp_len):
+@@ -577,7 +601,7 @@ class SmoothingFunction:
+ In COLING 2004.
+ """
+ return [
+- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
++ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1)
+ if i != 0
+ else p_n[0]
+ for i in range(len(p_n))
+
+commit eab59d7ceac5dc64e34ca04ee0143fa8998af204
+Author: Konstantin Chernyshev <k4black@ya.ru>
+Date: Thu Nov 16 19:46:47 2023 +0100
+
+ style: fix pre-commit style checks
+
+diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
+index 9647202e6..aab519521 100644
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -35,7 +35,9 @@ class Fraction:
+ return float(self) < float(other)
+
+ def __eq__(self, other):
+- return self.numerator == other.numerator and self.denominator == other.denominator
++ return (
++ self.numerator == other.numerator and self.denominator == other.denominator
++ )
+
+ def __gt__(self, other):
+ return float(self) > float(other)
+@@ -601,10 +603,7 @@ class SmoothingFunction:
+ In COLING 2004.
+ """
+ return [
+- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1)
+- if i != 0
+- else p_n[0]
+- for i in range(len(p_n))
++ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1) if i != 0 else p_n[0] for i in range(len(p_n))
+ ]
+
+ def method3(self, p_n, *args, **kwargs):
+
+commit 8f06fa4b1da4a390d53010e56bc6ab5d5cb32a3a
+Author: Konstantin Chernyshev <k4black@ya.ru>
+Date: Thu Nov 16 19:48:06 2023 +0100
+
+ style: fix pre-commit style checks
+
+diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
+index aab519521..f32743ecc 100644
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -603,7 +603,8 @@ class SmoothingFunction:
+ In COLING 2004.
+ """
+ return [
+- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1) if i != 0 else p_n[0] for i in range(len(p_n))
++ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1) if i != 0 else p_n[0]
++ for i in range(len(p_n))
+ ]
+
+ def method3(self, p_n, *args, **kwargs):
+
+commit b72a4bdbaf99a2bdb6a99454a1eb8804051bf644
+Author: Konstantin Chernyshev <k4black@ya.ru>
+Date: Thu Nov 16 21:49:32 2023 +0100
+
+ fix: Fraction
+
+diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
+index f32743ecc..72f5b548d 100644
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -42,6 +42,14 @@ class Fraction:
+ def __gt__(self, other):
+ return float(self) > float(other)
+
++ def __add__(self, other):
++ if isinstance(other, int):
++ other = Fraction(other, 1)
++ return Fraction(
++ self.numerator * other.denominator + other.numerator * self.denominator,
++ self.denominator * other.denominator,
++ )
++
+
+ def sentence_bleu(
+ references,
+
+commit 86fa0832f0f4b366f96867f59ae05d744d68b513
+Author: Konstantin Chernyshev <k4black@ya.ru>
+Date: Thu Nov 16 23:26:42 2023 +0100
+
+ fix: use Fraction override
+
+diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
+index 72f5b548d..ddb54dba8 100644
+--- a/nltk/translate/bleu_score.py
++++ b/nltk/translate/bleu_score.py
+@@ -7,48 +7,39 @@
+ # For license information, see LICENSE.TXT
+
+ """BLEU score implementation."""
+-from __future__ import annotations
+-
+ import math
+ import sys
+ import warnings
+ from collections import Counter
+-from dataclasses import dataclass
++from fractions import Fraction as _Fraction
+
+ from nltk.util import ngrams
+
+
+-@dataclass
+-class Fraction:
+- """
+- This class is used to represent a fraction with both the numerator and denominator saved for later retrieval.
+- Python 3.12 removed _normalize=False from the standard lib Fraction constructor.
+- """
+-
+- numerator: int | float
+- denominator: int = 1
+-
+- def __float__(self):
+- return self.numerator / self.denominator
+-
+- def __lt__(self, other):
+- return float(self) < float(other)
++class Fraction(_Fraction):
++ """Fraction with _normalize=False support for 3.12"""
+
+- def __eq__(self, other):
+- return (
+- self.numerator == other.numerator and self.denominator == other.denominator
+- )
++ def __new__(cls, numerator=0, denominator=None, _normalize=False):
++ if sys.version_info >= (3, 12):
++ self = super().__new__(cls, numerator, denominator)
++ else:
++ self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
++ self._normalize = _normalize
++ self._original_numerator = numerator
++ self._original_denominator = denominator
++ return self
+
+- def __gt__(self, other):
+- return float(self) > float(other)
++ @property
++ def numerator(self):
++ if self._normalize:
++ return self._numerator
++ return self._original_numerator
+
+- def __add__(self, other):
+- if isinstance(other, int):
+- other = Fraction(other, 1)
+- return Fraction(
+- self.numerator * other.denominator + other.numerator * self.denominator,
+- self.denominator * other.denominator,
+- )
++ @property
++ def denominator(self):
++ if self._normalize:
++ return self._denominator
++ return self._original_denominator
+
+
+ def sentence_bleu(
+@@ -256,7 +247,7 @@ def corpus_bleu(
+
+ # Collects the various precision values for the different ngram orders.
+ p_n = [
+- Fraction(p_numerators[i], p_denominators[i])
++ Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+ for i in range(1, max_weight_length + 1)
+ ]
+
+@@ -399,7 +390,7 @@ def modified_precision(references, hypothesis, n):
+ # Usually this happens when the ngram order is > len(reference).
+ denominator = max(1, sum(counts.values()))
+
+- return Fraction(numerator, denominator)
++ return Fraction(numerator, denominator, _normalize=False)
+
+
+ def closest_ref_length(references, hyp_len):
+@@ -611,7 +602,8 @@ class SmoothingFunction:
+ In COLING 2004.
+ """
+ return [
+- Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1) if i != 0 else p_n[0]
++ Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
++ if i != 0 else p_n[0]
+ for i in range(len(p_n))
+ ]
+
diff --git a/dev-python/nltk/metadata.xml b/dev-python/nltk/metadata.xml
new file mode 100644
index 0000000..622f82f
--- /dev/null
+++ b/dev-python/nltk/metadata.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <maintainer type="person">
+ <email>marcin.deranek@slonko.net</email>
+ <name>Marcin Deranek</name>
+ </maintainer>
+ <stabilize-allarches/>
+ <upstream>
+ <remote-id type="google-code">nltk</remote-id>
+ <remote-id type="pypi">nltk</remote-id>
+ <remote-id type="github">nltk/nltk</remote-id>
+ </upstream>
+</pkgmetadata>
diff --git a/dev-python/nltk/nltk-3.7.ebuild b/dev-python/nltk/nltk-3.7.ebuild
new file mode 100644
index 0000000..1abb630
--- /dev/null
+++ b/dev-python/nltk/nltk-3.7.ebuild
@@ -0,0 +1,62 @@
+# Copyright 1999-2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{9..11} )
+PYTHON_REQ_USE="sqlite,tk?,xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="Natural Language Toolkit"
+HOMEPAGE="https://www.nltk.org/ https://github.com/nltk/nltk/"
+SRC_URI="https://github.com/nltk/nltk/archive/${PV}.tar.gz -> ${P}.gh.tar.gz"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="amd64 ~ppc64 ~riscv x86 ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos"
+IUSE="tk"
+
+RDEPEND="
+ dev-python/click[${PYTHON_USEDEP}]
+ dev-python/joblib[${PYTHON_USEDEP}]
+ dev-python/regex[${PYTHON_USEDEP}]
+ dev-python/tqdm[${PYTHON_USEDEP}]
+"
+BDEPEND="
+ test? (
+ dev-python/joblib[${PYTHON_USEDEP}]
+ >=dev-python/nltk-data-20211221
+ dev-python/numpy[${PYTHON_USEDEP}]
+ dev-python/pyparsing[${PYTHON_USEDEP}]
+ dev-python/pytest-mock[${PYTHON_USEDEP}]
+ dev-python/twython[${PYTHON_USEDEP}]
+ dev-python/scikit-learn[${PYTHON_USEDEP}]
+ dev-python/scipy[${PYTHON_USEDEP}]
+ dev-python/matplotlib[${PYTHON_USEDEP}]
+ )"
+PDEPEND="dev-python/nltk-data"
+
+distutils_enable_tests pytest
+
+EPYTEST_DESELECT=(
+ # Internet
+ unit/test_downloader.py::test_downloader_using_existing_parent_download_dir
+ unit/test_downloader.py::test_downloader_using_non_existing_parent_download_dir
+)
+
+src_prepare() {
+ # requires unpackaged pycrfsuite
+ sed -i -e '/>>>/s@$@ # doctest: +SKIP@' nltk/tag/crf.py || die
+ # replace fetching from network with duplicate file URL
+ sed -e 's@https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg@nltk:grammars/sample_grammars/toy.cfg@' \
+ -i nltk/test/data.doctest || die
+
+ distutils-r1_src_prepare
+}
+
+src_test() {
+ cd nltk/test || die
+ distutils-r1_src_test
+}
diff --git a/dev-python/nltk/nltk-3.8.1.ebuild b/dev-python/nltk/nltk-3.8.1.ebuild
new file mode 100644
index 0000000..a37e7f5
--- /dev/null
+++ b/dev-python/nltk/nltk-3.8.1.ebuild
@@ -0,0 +1,49 @@
+# Copyright 1999-2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+PYTHON_COMPAT=( python3_{10..12} )
+PYTHON_REQ_USE="sqlite,tk?,xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="Natural Language Toolkit"
+HOMEPAGE="https://www.nltk.org/ https://github.com/nltk/nltk/"
+SRC_URI="https://github.com/nltk/nltk/archive/${PV}.tar.gz -> ${P}.gh.tar.gz"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="amd64 ~ppc64 ~riscv x86 ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos"
+IUSE="tk"
+
+RDEPEND="
+ dev-python/click[${PYTHON_USEDEP}]
+ dev-python/joblib[${PYTHON_USEDEP}]
+ dev-python/regex[${PYTHON_USEDEP}]
+ dev-python/tqdm[${PYTHON_USEDEP}]
+"
+BDEPEND="
+ test? (
+ dev-python/joblib[${PYTHON_USEDEP}]
+ >=dev-python/nltk-data-20211221
+ dev-python/numpy[${PYTHON_USEDEP}]
+ dev-python/pyparsing[${PYTHON_USEDEP}]
+ dev-python/pytest-mock[${PYTHON_USEDEP}]
+ dev-python/twython[${PYTHON_USEDEP}]
+ dev-python/scikit-learn[${PYTHON_USEDEP}]
+ dev-python/scipy[${PYTHON_USEDEP}]
+ dev-python/matplotlib[${PYTHON_USEDEP}]
+ )"
+PDEPEND="dev-python/nltk-data"
+
+PATCHES=( "${FILESDIR}/python-3.12.patch" )
+
+distutils_enable_tests pytest
+
+EPYTEST_DESELECT=(
+ # Internet
+ nltk/test/unit/test_downloader.py::test_downloader_using_existing_parent_download_dir
+ nltk/test/unit/test_downloader.py::test_downloader_using_non_existing_parent_download_dir
+)