diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aec3fc9..8cf6e003 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,12 +14,21 @@ jobs: matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] os: ["ubuntu-20.04", "macos-latest", "windows-latest"] + unsigned: [true, false] steps: - uses: actions/checkout@v3 # Pull the repository - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + + - name: Use unsigned indices + if: ${{ matrix.unsigned }} + run: | + cat src/annoymodule.cc | sed "s/.*\(#define ANNOYLIB_UNSIGNED_INDEX\)/\1/" > .tmp + cmp .tmp src/annoymodule.cc && exit 1 # ensure a change was actually made. + mv .tmp src/annoymodule.cc + - run: pip install . - run: pip install h5py numpy pytest - run: pytest -v diff --git a/setup.py b/setup.py index aa5cbe31..d5d395d1 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ if os.name != 'nt': extra_compile_args += ['-O3', '-ffast-math', '-fno-associative-math'] + extra_compile_args += ['-Wall', '-Wpedantic', '-Wextra'] # Add multithreaded build flag for all platforms using Python 3 and # for non-Windows Python 2 platforms diff --git a/src/annoylib.h b/src/annoylib.h index 657977cb..954fcb59 100644 --- a/src/annoylib.h +++ b/src/annoylib.h @@ -408,17 +408,29 @@ struct Base { static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) { // Override this in specific metric structs below if you need to do any pre-processing // on the entire set of nodes passed into this index. + + (void)nodes; // silence unused variable warnings. + (void)_s; + (void)node_count; + (void)f; } template static inline void postprocess(void* nodes, size_t _s, const S node_count, const int f) { // Override this in specific metric structs below if you need to do any post-processing // on the entire set of nodes passed into this index. + + (void)nodes; // silence unused variable warnings. + (void)_s; + (void)node_count; + (void)f; } template static inline void zero_value(Node* dest) { // Initialize any fields that require sane defaults within this node. + + (void)dest; // silence unused variable warnings. } template @@ -695,6 +707,7 @@ struct DotProduct : Angular { template static inline void postprocess(void* nodes, size_t _s, const S node_count, const int f) { + (void)f; // silence unused variable warnings. for (S i = 0; i < node_count; i++) { Node* node = get_node_ptr(nodes, _s, i); // When an index is built, we will remember it in index item nodes to compute distances differently @@ -743,12 +756,14 @@ struct Hamming : Base { } template static inline bool margin(const Node* n, const T* y, int f) { + (void)f; // silence unused variable warnings. static const size_t n_bits = sizeof(T) * 8; T chunk = n->v[0] / n_bits; return (y[chunk] & (static_cast(1) << (n_bits - 1 - (n->v[0] % n_bits)))) != 0; } template static inline bool side(const Node* n, const T* y, int f, Random& random) { + (void)random; // silence unused variable warnings. return margin(n, y, f); } template @@ -757,6 +772,7 @@ struct Hamming : Base { } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + (void)s; // silence unused variable warnings. size_t cur_size = 0; size_t i = 0; int dim = f * 8 * sizeof(T); @@ -796,6 +812,8 @@ struct Hamming : Base { } template static inline void init_node(Node* n, int f) { + (void)n; // silence unused variable warnings. + (void)f; } static const char* name() { return "hamming"; @@ -864,6 +882,8 @@ struct Euclidean : Minkowski { } template static inline void init_node(Node* n, int f) { + (void)n; // silence unused variable warnings. + (void)f; } static const char* name() { return "euclidean"; @@ -895,6 +915,8 @@ struct Manhattan : Minkowski { } template static inline void init_node(Node* n, int f) { + (void)n; // silence unused variable warnings. + (void)f; } static const char* name() { return "manhattan"; @@ -1202,9 +1224,10 @@ template= 0; i--) { + for (S x = _n_nodes; x > 0; x--) { // S may be unsigned, so don't use >= 0 to terminate + S i = x - 1; S k = _get(i)->n_descendants; - if (m == -1 || k == m) { + if (m == static_cast(-1) || k == m) { // first condition is still valid if S is unsigned, as m would be the largest possible number of descendants _roots.push_back(i); m = k; } else { @@ -1483,7 +1506,7 @@ template > nns_dist; - S last = -1; + S last = -1; // Safe sentinel for unsigned S; all indices must be less than the max value of S, otherwise get_n_items() would overflow. for (size_t i = 0; i < nns.size(); i++) { S j = nns[i]; if (j == last) @@ -1508,6 +1531,7 @@ class AnnoyIndexSingleThreadedBuildPolicy { public: template static void build(AnnoyIndex* annoy, int q, int n_threads) { + (void)n_threads; // silence unused variable warnings. AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy; annoy->thread_build(q, 0, threaded_build_policy); } diff --git a/src/annoymodule.cc b/src/annoymodule.cc index 6bb0ae1b..91744f96 100644 --- a/src/annoymodule.cc +++ b/src/annoymodule.cc @@ -62,31 +62,40 @@ using namespace Annoy; typedef AnnoyIndexSingleThreadedBuildPolicy AnnoyIndexThreadedBuildPolicy; #endif -template class Annoy::AnnoyIndexInterface; +//#define ANNOYLIB_UNSIGNED_INDEX +#ifdef ANNOYLIB_UNSIGNED_INDEX +#define Index_t uint32_t +#define INDEXF "I" +#else +#define Index_t int32_t +#define INDEXF "i" +#endif + +template class Annoy::AnnoyIndexInterface; -class HammingWrapper : public AnnoyIndexInterface { +class HammingWrapper : public AnnoyIndexInterface { // Wrapper class for Hamming distance, using composition. // This translates binary (float) vectors into packed uint64_t vectors. // This is questionable from a performance point of view. Should reconsider this solution. private: - int32_t _f_external, _f_internal; - AnnoyIndex _index; + Index_t _f_external, _f_internal; + AnnoyIndex _index; void _pack(const float* src, uint64_t* dst) const { - for (int32_t i = 0; i < _f_internal; i++) { + for (Index_t i = 0; i < _f_internal; i++) { dst[i] = 0; - for (int32_t j = 0; j < 64 && i*64+j < _f_external; j++) { + for (Index_t j = 0; j < 64 && i*64+j < _f_external; j++) { dst[i] |= (uint64_t)(src[i * 64 + j] > 0.5) << j; } } }; void _unpack(const uint64_t* src, float* dst) const { - for (int32_t i = 0; i < _f_external; i++) { + for (Index_t i = 0; i < _f_external; i++) { dst[i] = (src[i / 64] >> (i % 64)) & 1; } }; public: HammingWrapper(int f) : _f_external(f), _f_internal((f + 63) / 64), _index((f + 63) / 64) {}; - bool add_item(int32_t item, const float* w, char**error) { + bool add_item(Index_t item, const float* w, char**error) { vector w_internal(_f_internal, 0); _pack(w, &w_internal[0]); return _index.add_item(item, &w_internal[0], error); @@ -96,8 +105,8 @@ class HammingWrapper : public AnnoyIndexInterface { bool save(const char* filename, bool prefault, char** error) { return _index.save(filename, prefault, error); }; void unload() { _index.unload(); }; bool load(const char* filename, bool prefault, char** error) { return _index.load(filename, prefault, error); }; - float get_distance(int32_t i, int32_t j) const { return _index.get_distance(i, j); }; - void get_nns_by_item(int32_t item, size_t n, int search_k, vector* result, vector* distances) const { + float get_distance(Index_t i, Index_t j) const { return _index.get_distance(i, j); }; + void get_nns_by_item(Index_t item, size_t n, int search_k, vector* result, vector* distances) const { if (distances) { vector distances_internal; _index.get_nns_by_item(item, n, search_k, result, &distances_internal); @@ -106,7 +115,7 @@ class HammingWrapper : public AnnoyIndexInterface { _index.get_nns_by_item(item, n, search_k, result, NULL); } }; - void get_nns_by_vector(const float* w, size_t n, int search_k, vector* result, vector* distances) const { + void get_nns_by_vector(const float* w, size_t n, int search_k, vector* result, vector* distances) const { vector w_internal(_f_internal, 0); _pack(w, &w_internal[0]); if (distances) { @@ -117,10 +126,10 @@ class HammingWrapper : public AnnoyIndexInterface { _index.get_nns_by_vector(&w_internal[0], n, search_k, result, NULL); } }; - int32_t get_n_items() const { return _index.get_n_items(); }; - int32_t get_n_trees() const { return _index.get_n_trees(); }; + Index_t get_n_items() const { return _index.get_n_items(); }; + Index_t get_n_trees() const { return _index.get_n_trees(); }; void verbose(bool v) { _index.verbose(v); }; - void get_item(int32_t item, float* v) const { + void get_item(Index_t item, float* v) const { vector v_internal(_f_internal, 0); _index.get_item(item, &v_internal[0]); _unpack(&v_internal[0], v); @@ -133,7 +142,7 @@ class HammingWrapper : public AnnoyIndexInterface { typedef struct { PyObject_HEAD int f; - AnnoyIndexInterface* ptr; + AnnoyIndexInterface* ptr; } py_annoy; @@ -152,17 +161,17 @@ py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { // This keeps coming up, see #368 etc PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be removed " "in future version of Annoy. Please pass metric='angular' explicitly.", 1); - self->ptr = new AnnoyIndex(self->f); + self->ptr = new AnnoyIndex(self->f); } else if (!strcmp(metric, "angular")) { - self->ptr = new AnnoyIndex(self->f); + self->ptr = new AnnoyIndex(self->f); } else if (!strcmp(metric, "euclidean")) { - self->ptr = new AnnoyIndex(self->f); + self->ptr = new AnnoyIndex(self->f); } else if (!strcmp(metric, "manhattan")) { - self->ptr = new AnnoyIndex(self->f); + self->ptr = new AnnoyIndex(self->f); } else if (!strcmp(metric, "hamming")) { self->ptr = new HammingWrapper(self->f); } else if (!strcmp(metric, "dot")) { - self->ptr = new AnnoyIndex(self->f); + self->ptr = new AnnoyIndex(self->f); } else { PyErr_SetString(PyExc_ValueError, "No such metric"); return NULL; @@ -174,6 +183,8 @@ py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { static int py_an_init(py_annoy *self, PyObject *args, PyObject *kwargs) { + (void)self; // silence unused variable warnings. + // Seems to be needed for Python 3 const char *metric = NULL; int f; @@ -237,7 +248,7 @@ py_an_save(py_annoy *self, PyObject *args, PyObject *kwargs) { PyObject* -get_nns_to_python(const vector& result, const vector& distances, int include_distances) { +get_nns_to_python(const vector& result, const vector& distances, int include_distances) { PyObject* l = NULL; PyObject* d = NULL; PyObject* t = NULL; @@ -283,11 +294,22 @@ get_nns_to_python(const vector& result, const vector& distances, } -bool check_constraints(py_annoy *self, int32_t item, bool building) { +bool check_constraints(py_annoy *self, Index_t item, bool building) { +#ifdef ANNOYLIB_UNSIGNED_INDEX + // Special cased for unit tests, and besides, no item should have an index equal to the maximum, + // otherwise get_n_items() would not be representable. + if (item == static_cast(-1)) { + PyErr_SetString(PyExc_IndexError, "Item index out of range"); + return false; + } +#else if (item < 0) { PyErr_SetString(PyExc_IndexError, "Item index can not be negative"); return false; - } else if (!building && item >= self->ptr->get_n_items()) { + } +#endif + + if (!building && item >= self->ptr->get_n_items()) { PyErr_SetString(PyExc_IndexError, "Item index larger than the largest item index"); return false; } else { @@ -297,19 +319,19 @@ bool check_constraints(py_annoy *self, int32_t item, bool building) { static PyObject* py_an_get_nns_by_item(py_annoy *self, PyObject *args, PyObject *kwargs) { - int32_t item, n, search_k=-1, include_distances=0; + Index_t item, n, search_k=-1, include_distances=0; if (!self->ptr) return NULL; static char const * kwlist[] = {"i", "n", "search_k", "include_distances", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ii|ii", (char**)kwlist, &item, &n, &search_k, &include_distances)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, INDEXF INDEXF "|" INDEXF INDEXF, (char**)kwlist, &item, &n, &search_k, &include_distances)) return NULL; if (!check_constraints(self, item, false)) { return NULL; } - vector result; + vector result; vector distances; Py_BEGIN_ALLOW_THREADS; @@ -354,12 +376,12 @@ convert_list_to_vector(PyObject* v, int f, vector* w) { static PyObject* py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) { PyObject* v; - int32_t n, search_k=-1, include_distances=0; + Index_t n, search_k=-1, include_distances=0; if (!self->ptr) return NULL; static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii", (char**)kwlist, &v, &n, &search_k, &include_distances)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O" INDEXF "|" INDEXF INDEXF, (char**)kwlist, &v, &n, &search_k, &include_distances)) return NULL; vector w(self->f); @@ -367,7 +389,7 @@ py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) { return NULL; } - vector result; + vector result; vector distances; Py_BEGIN_ALLOW_THREADS; @@ -380,10 +402,10 @@ py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) { static PyObject* py_an_get_item_vector(py_annoy *self, PyObject *args) { - int32_t item; + Index_t item; if (!self->ptr) return NULL; - if (!PyArg_ParseTuple(args, "i", &item)) + if (!PyArg_ParseTuple(args, INDEXF, &item)) return NULL; if (!check_constraints(self, item, false)) { @@ -415,11 +437,11 @@ py_an_get_item_vector(py_annoy *self, PyObject *args) { static PyObject* py_an_add_item(py_annoy *self, PyObject *args, PyObject* kwargs) { PyObject* v; - int32_t item; + Index_t item; if (!self->ptr) return NULL; static char const * kwlist[] = {"i", "vector", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "iO", (char**)kwlist, &item, &v)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, INDEXF "O", (char**)kwlist, &item, &v)) return NULL; if (!check_constraints(self, item, true)) { @@ -511,10 +533,10 @@ py_an_unload(py_annoy *self) { static PyObject * py_an_get_distance(py_annoy *self, PyObject *args) { - int32_t i, j; + Index_t i, j; if (!self->ptr) return NULL; - if (!PyArg_ParseTuple(args, "ii", &i, &j)) + if (!PyArg_ParseTuple(args, INDEXF INDEXF, &i, &j)) return NULL; if (!check_constraints(self, i, false) || !check_constraints(self, j, false)) { @@ -531,7 +553,7 @@ py_an_get_n_items(py_annoy *self) { if (!self->ptr) return NULL; - int32_t n = self->ptr->get_n_items(); + Index_t n = self->ptr->get_n_items(); return PyInt_FromLong(n); } @@ -540,7 +562,7 @@ py_an_get_n_trees(py_annoy *self) { if (!self->ptr) return NULL; - int32_t n = self->ptr->get_n_trees(); + Index_t n = self->ptr->get_n_trees(); return PyInt_FromLong(n); }