diff --git a/.gitignore b/.gitignore index 6b070ba..eb51358 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,55 @@ +<<<<<<< HEAD +# These are some examples of commonly ignored file patterns. +# You should customize this list as applicable to your project. +# Learn more about .gitignore: +# https://www.atlassian.com/git/tutorials/saving-changes/gitignore + +# Node artifact files +node_modules/ +dist/ + +# Compiled Java class files +*.class + +# Compiled Python bytecode +*.py[cod] + +# Log files +*.log + +# Package files +*.jar + +# Maven +target/ +dist/ + +# JetBrains IDE +.idea/ + +# Unit test reports +TEST*.xml + +# Generated by MacOS +.DS_Store + +# Generated by Windows +Thumbs.db + +# Applications +*.app +*.exe +*.war + +# Large media files +*.mp4 +*.tiff +*.avi +*.flv +*.mov +*.wmv + +======= # Temporary and binary files *~ *.py[cod] @@ -55,3 +107,4 @@ MANIFEST extern/rds2cpp* src/rds2py/lib/parser.cpp +>>>>>>> bd-fix-windows diff --git a/CMakeFiles/CMakeSystem.cmake b/CMakeFiles/CMakeSystem.cmake new file mode 100644 index 0000000..1a46f66 --- /dev/null +++ b/CMakeFiles/CMakeSystem.cmake @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Windows") +set(CMAKE_HOST_SYSTEM_NAME "Windows") +set(CMAKE_HOST_SYSTEM_VERSION "") +set(CMAKE_HOST_SYSTEM_PROCESSOR "AMD64") + + + +set(CMAKE_SYSTEM "Windows") +set(CMAKE_SYSTEM_NAME "Windows") +set(CMAKE_SYSTEM_VERSION "") +set(CMAKE_SYSTEM_PROCESSOR "AMD64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 14470b7..71337a7 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -3,11 +3,36 @@ cmake_minimum_required(VERSION 3.24) project(rds2py VERSION 1.0.0 DESCRIPTION "Building the rds shared library" - LANGUAGES CXX) + LANGUAGES C CXX) # Importing all of the dependencies with pinned versions (even for transitive dependencies). include(FetchContent) +# On Windows (or when zlib is not found), build zlib from source so that +# byteme can use GzipFileReader / GzipFileWriter. +find_package(ZLIB QUIET) +if(NOT ZLIB_FOUND) + message(STATUS "System zlib not found -- building zlib from source via FetchContent") + FetchContent_Declare( + zlib + GIT_REPOSITORY https://github.com/madler/zlib + GIT_TAG v1.3.1 + ) + set(ZLIB_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(zlib) + + # Make the zlib headers visible to downstream targets and set the + # ZLIB_FOUND / ZLIB::ZLIB variables that byteme's CMakeLists.txt looks for. + add_library(ZLIB::ZLIB ALIAS zlibstatic) + target_include_directories(zlibstatic PUBLIC + "${zlib_SOURCE_DIR}" + "${zlib_BINARY_DIR}" + ) + set(ZLIB_FOUND TRUE CACHE BOOL "" FORCE) + set(ZLIB_LIBRARIES zlibstatic CACHE STRING "" FORCE) + set(ZLIB_INCLUDE_DIRS "${zlib_SOURCE_DIR};${zlib_BINARY_DIR}" CACHE STRING "" FORCE) +endif() + FetchContent_Declare( rds2cpp GIT_REPOSITORY https://github.com/LTLA/rds2cpp @@ -44,6 +69,11 @@ set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17) target_link_libraries(${TARGET} PRIVATE rds2cpp pybind11::pybind11) +# On Windows, also link zlib so the gzip symbols are available at link time. +if(NOT ZLIB_FOUND OR TARGET zlibstatic) + target_link_libraries(${TARGET} PRIVATE ZLIB::ZLIB) +endif() + set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME lib_rds_parser PREFIX "" diff --git a/lib/src/rdswrapper.cpp b/lib/src/rdswrapper.cpp index 2ed6aa2..c5f40f8 100644 --- a/lib/src/rdswrapper.cpp +++ b/lib/src/rdswrapper.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace py = pybind11; @@ -10,15 +11,16 @@ namespace py = pybind11; class RdsReader { private: const rds2cpp::RObject* ptr; + const std::vector* symbols; public: - RdsReader(const rds2cpp::RObject* p) : ptr(p) { + RdsReader(const rds2cpp::RObject* p, const std::vector* syms) : ptr(p), symbols(syms) { if (!p) throw std::runtime_error("Null pointer passed to 'RdsReader'."); + if (!syms) throw std::runtime_error("Null symbols pointer passed to 'RdsReader'."); } std::string get_rtype() const { if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'."); - // py::print("arg::", static_cast(ptr->type())); switch (ptr->type()) { case rds2cpp::SEXPType::S4: return "S4"; case rds2cpp::SEXPType::INT: return "integer"; @@ -69,23 +71,36 @@ class RdsReader { throw std::runtime_error("Invalid type for 'string_arr'"); } const auto& data = static_cast(ptr)->data; - return py::cast(data); + py::list result; + for (const auto& s : data) { + if (s.value.has_value()) { + result.append(py::str(s.value.value())); + } else { + result.append(py::none()); + } + } + return result; } py::list get_attribute_names() const { if (!ptr) throw std::runtime_error("Null pointer in 'get_attribute_names'"); - return py::cast(get_attributes().names); + const auto& attrs = get_attributes(); + py::list names; + for (const auto& attr : attrs) { + names.append((*symbols)[attr.name.index].name); + } + return names; } py::object load_attribute_by_name(const std::string& name) const { if (!ptr) throw std::runtime_error("Null pointer in 'load_attribute_by_name'"); - const auto& attributes = get_attributes(); - auto it = std::find(attributes.names.begin(), attributes.names.end(), name); - if (it == attributes.names.end()) { - throw std::runtime_error("Attribute not found: " + name); + const auto& attrs = get_attributes(); + for (const auto& attr : attrs) { + if ((*symbols)[attr.name.index].name == name) { + return py::cast(new RdsReader(attr.value.get(), symbols)); + } } - size_t index = std::distance(attributes.names.begin(), it); - return py::cast(new RdsReader(attributes.values[index].get())); + throw std::runtime_error("Attribute not found: " + name); } py::object load_vec_element(int index) const { @@ -97,7 +112,7 @@ class RdsReader { if (index < 0 || static_cast(index) >= data.size()) { throw std::out_of_range("Vector index out of range"); } - return py::cast(new RdsReader(data[index].get())); + return py::cast(new RdsReader(data[index].get(), symbols)); } std::string get_package_name() const { @@ -126,7 +141,7 @@ class RdsReader { } private: - const rds2cpp::Attributes& get_attributes() const { + const std::vector& get_attributes() const { if (!ptr) throw std::runtime_error("Null pointer in get_attributes"); switch (ptr->type()) { case rds2cpp::SEXPType::INT: return static_cast(ptr)->attributes; @@ -142,18 +157,18 @@ class RdsReader { class RdsObject { private: - std::unique_ptr parsed; + std::unique_ptr parsed; std::unique_ptr reader; public: RdsObject(const std::string& file) { try { rds2cpp::ParseRdsOptions options; - parsed = std::make_unique(rds2cpp::parse_rds(file, options)); + parsed = std::make_unique(rds2cpp::parse_rds(file, options)); if (!parsed || !parsed->object) { throw std::runtime_error("Failed to parse RDS file"); } - reader = std::make_unique(parsed->object.get()); + reader = std::make_unique(parsed->object.get(), &parsed->symbols); } catch (const std::exception& e) { throw std::runtime_error(std::string("Error in 'RdsObject' constructor: ") + e.what()); } @@ -181,38 +196,31 @@ class RdaObject { py::list get_object_names() const { if (!parsed) throw std::runtime_error("Null parsed in 'get_object_names'"); - const auto& pairlist = parsed->contents; py::list names; - for (size_t i = 0; i < pairlist.tag_names.size(); ++i) { - if (pairlist.has_tag[i]) { - names.append(pairlist.tag_names[i]); - } else { - names.append(py::none()); - } + for (const auto& obj : parsed->objects) { + names.append(parsed->symbols[obj.name.index].name); } return names; } int get_object_count() const { if (!parsed) throw std::runtime_error("Null parsed in 'get_object_count'"); - return static_cast(parsed->contents.data.size()); + return static_cast(parsed->objects.size()); } RdsReader* get_object_by_index(int index) const { if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_index'"); - const auto& data = parsed->contents.data; - if (index < 0 || static_cast(index) >= data.size()) { + if (index < 0 || static_cast(index) >= parsed->objects.size()) { throw std::out_of_range("Object index out of range"); } - return new RdsReader(data[index].get()); + return new RdsReader(parsed->objects[index].value.get(), &parsed->symbols); } RdsReader* get_object_by_name(const std::string& name) const { if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_name'"); - const auto& pairlist = parsed->contents; - for (size_t i = 0; i < pairlist.tag_names.size(); ++i) { - if (pairlist.has_tag[i] && pairlist.tag_names[i] == name) { - return new RdsReader(pairlist.data[i].get()); + for (const auto& obj : parsed->objects) { + if (parsed->symbols[obj.name.index].name == name) { + return new RdsReader(obj.value.get(), &parsed->symbols); } } throw std::runtime_error("Object not found: " + name); @@ -234,7 +242,6 @@ PYBIND11_MODULE(lib_rds_parser, m) { .def("get_object_by_name", &RdaObject::get_object_by_name, py::return_value_policy::take_ownership, py::keep_alive<0, 1>()); py::class_(m, "RdsReader") - .def(py::init()) .def("get_rtype", &RdsReader::get_rtype) .def("get_rsize", &RdsReader::get_rsize) .def("get_numeric_data", &RdsReader::get_numeric_data) diff --git a/setup.cfg b/setup.cfg index c7dbc2a..94469fc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,7 @@ project_urls = # Twitter = https://twitter.com/PyScaffold # Change if running only on Windows, Mac or Linux (comma-separated) -platforms = Mac, Linux +platforms = Mac, Linux, Windows # Add here all kinds of additional classifiers as defined under # https://pypi.org/classifiers/ diff --git a/setup.py b/setup.py index 8da113f..07edeb2 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ from setuptools import setup, Extension from setuptools.command.build_ext import build_ext as build_ext_orig +import glob import pathlib import os import shutil @@ -33,23 +34,26 @@ def build_cmake(self, ext): outpath = os.path.join(build_lib.absolute(), ext.name) build_temp = os.path.join(build_temp, "build") - if not os.path.exists(build_temp): - cmd = [ - "cmake", - "-S", - "lib", - "-B", - build_temp, - "-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"), - "-DPYTHON_EXECUTABLE=" + sys.executable, - ] - if os.name != "nt": - cmd.append("-DCMAKE_BUILD_TYPE=Release") - cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath) + # Remove stale build dir to avoid FetchContent conflicts with + # leftover _deps from a previous pip build environment. + if os.path.exists(build_temp): + shutil.rmtree(build_temp, ignore_errors=True) + cmd = [ + "cmake", + "-S", + "lib", + "-B", + build_temp, + "-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"), + "-DPYTHON_EXECUTABLE=" + sys.executable, + ] + if os.name != "nt": + cmd.append("-DCMAKE_BUILD_TYPE=Release") + cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath) - if "MORE_CMAKE_OPTIONS" in os.environ: - cmd += os.environ["MORE_CMAKE_OPTIONS"].split() - self.spawn(cmd) + if "MORE_CMAKE_OPTIONS" in os.environ: + cmd += os.environ["MORE_CMAKE_OPTIONS"].split() + self.spawn(cmd) if not self.dry_run: cmd = ["cmake", "--build", build_temp] @@ -59,10 +63,20 @@ def build_cmake(self, ext): if os.name == "nt": # Gave up trying to get MSVC to respect the output directory. # Delvewheel also needs it to have a 'pyd' suffix... whatever. - shutil.copyfile( - os.path.join(build_temp, "Release", "_core.dll"), - os.path.join(outpath, "_core.pyd"), - ) + # The CMake target name is lib_rds_parser; MSVC puts it under Release/. + # pybind11 may add an ABI tag (e.g. lib_rds_parser.cp312-win_amd64.pyd). + release_dir = os.path.join(build_temp, "Release") + candidates = glob.glob(os.path.join(release_dir, "lib_rds_parser*")) + if not candidates: + raise RuntimeError( + f"Cannot find compiled library in {release_dir}. " + f"Contents: {os.listdir(release_dir) if os.path.isdir(release_dir) else 'dir not found'}" + ) + # Prefer .pyd files over .lib/.exp + pyd_files = [c for c in candidates if c.endswith(".pyd")] + src_name = pyd_files[0] if pyd_files else candidates[0] + os.makedirs(outpath, exist_ok=True) + shutil.copyfile(src_name, os.path.join(outpath, "lib_rds_parser.pyd")) if __name__ == "__main__":