Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,55 @@
<<<<<<< HEAD
# These are some examples of commonly ignored file patterns.
# You should customize this list as applicable to your project.
# Learn more about .gitignore:
# https://www.atlassian.com/git/tutorials/saving-changes/gitignore

# Node artifact files
node_modules/
dist/

# Compiled Java class files
*.class

# Compiled Python bytecode
*.py[cod]

# Log files
*.log

# Package files
*.jar

# Maven
target/
dist/

# JetBrains IDE
.idea/

# Unit test reports
TEST*.xml

# Generated by MacOS
.DS_Store

# Generated by Windows
Thumbs.db

# Applications
*.app
*.exe
*.war

# Large media files
*.mp4
*.tiff
*.avi
*.flv
*.mov
*.wmv

=======
# Temporary and binary files
*~
*.py[cod]
Expand Down Expand Up @@ -55,3 +107,4 @@ MANIFEST

extern/rds2cpp*
src/rds2py/lib/parser.cpp
>>>>>>> bd-fix-windows
15 changes: 15 additions & 0 deletions CMakeFiles/CMakeSystem.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
set(CMAKE_HOST_SYSTEM "Windows")
set(CMAKE_HOST_SYSTEM_NAME "Windows")
set(CMAKE_HOST_SYSTEM_VERSION "")
set(CMAKE_HOST_SYSTEM_PROCESSOR "AMD64")



set(CMAKE_SYSTEM "Windows")
set(CMAKE_SYSTEM_NAME "Windows")
set(CMAKE_SYSTEM_VERSION "")
set(CMAKE_SYSTEM_PROCESSOR "AMD64")

set(CMAKE_CROSSCOMPILING "FALSE")

set(CMAKE_SYSTEM_LOADED 1)
32 changes: 31 additions & 1 deletion lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,36 @@ cmake_minimum_required(VERSION 3.24)
project(rds2py
VERSION 1.0.0
DESCRIPTION "Building the rds shared library"
LANGUAGES CXX)
LANGUAGES C CXX)

# Importing all of the dependencies with pinned versions (even for transitive dependencies).
include(FetchContent)

# On Windows (or when zlib is not found), build zlib from source so that
# byteme can use GzipFileReader / GzipFileWriter.
find_package(ZLIB QUIET)
if(NOT ZLIB_FOUND)
message(STATUS "System zlib not found -- building zlib from source via FetchContent")
FetchContent_Declare(
zlib
GIT_REPOSITORY https://github.com/madler/zlib
GIT_TAG v1.3.1
)
set(ZLIB_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(zlib)

# Make the zlib headers visible to downstream targets and set the
# ZLIB_FOUND / ZLIB::ZLIB variables that byteme's CMakeLists.txt looks for.
add_library(ZLIB::ZLIB ALIAS zlibstatic)
target_include_directories(zlibstatic PUBLIC
"${zlib_SOURCE_DIR}"
"${zlib_BINARY_DIR}"
)
set(ZLIB_FOUND TRUE CACHE BOOL "" FORCE)
set(ZLIB_LIBRARIES zlibstatic CACHE STRING "" FORCE)
set(ZLIB_INCLUDE_DIRS "${zlib_SOURCE_DIR};${zlib_BINARY_DIR}" CACHE STRING "" FORCE)
endif()

FetchContent_Declare(
rds2cpp
GIT_REPOSITORY https://github.com/LTLA/rds2cpp
Expand Down Expand Up @@ -44,6 +69,11 @@ set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17)

target_link_libraries(${TARGET} PRIVATE rds2cpp pybind11::pybind11)

# On Windows, also link zlib so the gzip symbols are available at link time.
if(NOT ZLIB_FOUND OR TARGET zlibstatic)
target_link_libraries(${TARGET} PRIVATE ZLIB::ZLIB)
endif()

set_target_properties(${TARGET} PROPERTIES
OUTPUT_NAME lib_rds_parser
PREFIX ""
Expand Down
69 changes: 38 additions & 31 deletions lib/src/rdswrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,24 @@
#include <pybind11/numpy.h>
#include <rds2cpp/rds2cpp.hpp>
#include <stdexcept>
#include <algorithm>
#include <pybind11/iostream.h>

namespace py = pybind11;

class RdsReader {
private:
const rds2cpp::RObject* ptr;
const std::vector<rds2cpp::Symbol>* symbols;

public:
RdsReader(const rds2cpp::RObject* p) : ptr(p) {
RdsReader(const rds2cpp::RObject* p, const std::vector<rds2cpp::Symbol>* syms) : ptr(p), symbols(syms) {
if (!p) throw std::runtime_error("Null pointer passed to 'RdsReader'.");
if (!syms) throw std::runtime_error("Null symbols pointer passed to 'RdsReader'.");
}

std::string get_rtype() const {
if (!ptr) throw std::runtime_error("Null pointer in 'get_rtype'.");
// py::print("arg::", static_cast<int>(ptr->type()));
switch (ptr->type()) {
case rds2cpp::SEXPType::S4: return "S4";
case rds2cpp::SEXPType::INT: return "integer";
Expand Down Expand Up @@ -69,23 +71,36 @@ class RdsReader {
throw std::runtime_error("Invalid type for 'string_arr'");
}
const auto& data = static_cast<const rds2cpp::StringVector*>(ptr)->data;
return py::cast(data);
py::list result;
for (const auto& s : data) {
if (s.value.has_value()) {
result.append(py::str(s.value.value()));
} else {
result.append(py::none());
}
}
return result;
}

py::list get_attribute_names() const {
if (!ptr) throw std::runtime_error("Null pointer in 'get_attribute_names'");
return py::cast(get_attributes().names);
const auto& attrs = get_attributes();
py::list names;
for (const auto& attr : attrs) {
names.append((*symbols)[attr.name.index].name);
}
return names;
}

py::object load_attribute_by_name(const std::string& name) const {
if (!ptr) throw std::runtime_error("Null pointer in 'load_attribute_by_name'");
const auto& attributes = get_attributes();
auto it = std::find(attributes.names.begin(), attributes.names.end(), name);
if (it == attributes.names.end()) {
throw std::runtime_error("Attribute not found: " + name);
const auto& attrs = get_attributes();
for (const auto& attr : attrs) {
if ((*symbols)[attr.name.index].name == name) {
return py::cast(new RdsReader(attr.value.get(), symbols));
}
}
size_t index = std::distance(attributes.names.begin(), it);
return py::cast(new RdsReader(attributes.values[index].get()));
throw std::runtime_error("Attribute not found: " + name);
}

py::object load_vec_element(int index) const {
Expand All @@ -97,7 +112,7 @@ class RdsReader {
if (index < 0 || static_cast<size_t>(index) >= data.size()) {
throw std::out_of_range("Vector index out of range");
}
return py::cast(new RdsReader(data[index].get()));
return py::cast(new RdsReader(data[index].get(), symbols));
}

std::string get_package_name() const {
Expand Down Expand Up @@ -126,7 +141,7 @@ class RdsReader {
}

private:
const rds2cpp::Attributes& get_attributes() const {
const std::vector<rds2cpp::Attribute>& get_attributes() const {
if (!ptr) throw std::runtime_error("Null pointer in get_attributes");
switch (ptr->type()) {
case rds2cpp::SEXPType::INT: return static_cast<const rds2cpp::IntegerVector*>(ptr)->attributes;
Expand All @@ -142,18 +157,18 @@ class RdsReader {

class RdsObject {
private:
std::unique_ptr<rds2cpp::Parsed> parsed;
std::unique_ptr<rds2cpp::RdsFile> parsed;
std::unique_ptr<RdsReader> reader;

public:
RdsObject(const std::string& file) {
try {
rds2cpp::ParseRdsOptions options;
parsed = std::make_unique<rds2cpp::Parsed>(rds2cpp::parse_rds(file, options));
parsed = std::make_unique<rds2cpp::RdsFile>(rds2cpp::parse_rds(file, options));
if (!parsed || !parsed->object) {
throw std::runtime_error("Failed to parse RDS file");
}
reader = std::make_unique<RdsReader>(parsed->object.get());
reader = std::make_unique<RdsReader>(parsed->object.get(), &parsed->symbols);
} catch (const std::exception& e) {
throw std::runtime_error(std::string("Error in 'RdsObject' constructor: ") + e.what());
}
Expand Down Expand Up @@ -181,38 +196,31 @@ class RdaObject {

py::list get_object_names() const {
if (!parsed) throw std::runtime_error("Null parsed in 'get_object_names'");
const auto& pairlist = parsed->contents;
py::list names;
for (size_t i = 0; i < pairlist.tag_names.size(); ++i) {
if (pairlist.has_tag[i]) {
names.append(pairlist.tag_names[i]);
} else {
names.append(py::none());
}
for (const auto& obj : parsed->objects) {
names.append(parsed->symbols[obj.name.index].name);
}
return names;
}

int get_object_count() const {
if (!parsed) throw std::runtime_error("Null parsed in 'get_object_count'");
return static_cast<int>(parsed->contents.data.size());
return static_cast<int>(parsed->objects.size());
}

RdsReader* get_object_by_index(int index) const {
if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_index'");
const auto& data = parsed->contents.data;
if (index < 0 || static_cast<size_t>(index) >= data.size()) {
if (index < 0 || static_cast<size_t>(index) >= parsed->objects.size()) {
throw std::out_of_range("Object index out of range");
}
return new RdsReader(data[index].get());
return new RdsReader(parsed->objects[index].value.get(), &parsed->symbols);
}

RdsReader* get_object_by_name(const std::string& name) const {
if (!parsed) throw std::runtime_error("Null parsed in 'get_object_by_name'");
const auto& pairlist = parsed->contents;
for (size_t i = 0; i < pairlist.tag_names.size(); ++i) {
if (pairlist.has_tag[i] && pairlist.tag_names[i] == name) {
return new RdsReader(pairlist.data[i].get());
for (const auto& obj : parsed->objects) {
if (parsed->symbols[obj.name.index].name == name) {
return new RdsReader(obj.value.get(), &parsed->symbols);
}
}
throw std::runtime_error("Object not found: " + name);
Expand All @@ -234,7 +242,6 @@ PYBIND11_MODULE(lib_rds_parser, m) {
.def("get_object_by_name", &RdaObject::get_object_by_name, py::return_value_policy::take_ownership, py::keep_alive<0, 1>());

py::class_<RdsReader>(m, "RdsReader")
.def(py::init<const rds2cpp::RObject*>())
.def("get_rtype", &RdsReader::get_rtype)
.def("get_rsize", &RdsReader::get_rsize)
.def("get_numeric_data", &RdsReader::get_numeric_data)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ project_urls =
# Twitter = https://twitter.com/PyScaffold

# Change if running only on Windows, Mac or Linux (comma-separated)
platforms = Mac, Linux
platforms = Mac, Linux, Windows

# Add here all kinds of additional classifiers as defined under
# https://pypi.org/classifiers/
Expand Down
54 changes: 34 additions & 20 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext as build_ext_orig
import glob
import pathlib
import os
import shutil
Expand All @@ -33,23 +34,26 @@ def build_cmake(self, ext):
outpath = os.path.join(build_lib.absolute(), ext.name)

build_temp = os.path.join(build_temp, "build")
if not os.path.exists(build_temp):
cmd = [
"cmake",
"-S",
"lib",
"-B",
build_temp,
"-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"),
"-DPYTHON_EXECUTABLE=" + sys.executable,
]
if os.name != "nt":
cmd.append("-DCMAKE_BUILD_TYPE=Release")
cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath)
# Remove stale build dir to avoid FetchContent conflicts with
# leftover _deps from a previous pip build environment.
if os.path.exists(build_temp):
shutil.rmtree(build_temp, ignore_errors=True)
cmd = [
"cmake",
"-S",
"lib",
"-B",
build_temp,
"-Dpybind11_DIR=" + os.path.join(os.path.dirname(pybind11.__file__), "share", "cmake", "pybind11"),
"-DPYTHON_EXECUTABLE=" + sys.executable,
]
if os.name != "nt":
cmd.append("-DCMAKE_BUILD_TYPE=Release")
cmd.append("-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + outpath)

if "MORE_CMAKE_OPTIONS" in os.environ:
cmd += os.environ["MORE_CMAKE_OPTIONS"].split()
self.spawn(cmd)
if "MORE_CMAKE_OPTIONS" in os.environ:
cmd += os.environ["MORE_CMAKE_OPTIONS"].split()
self.spawn(cmd)

if not self.dry_run:
cmd = ["cmake", "--build", build_temp]
Expand All @@ -59,10 +63,20 @@ def build_cmake(self, ext):
if os.name == "nt":
# Gave up trying to get MSVC to respect the output directory.
# Delvewheel also needs it to have a 'pyd' suffix... whatever.
shutil.copyfile(
os.path.join(build_temp, "Release", "_core.dll"),
os.path.join(outpath, "_core.pyd"),
)
# The CMake target name is lib_rds_parser; MSVC puts it under Release/.
# pybind11 may add an ABI tag (e.g. lib_rds_parser.cp312-win_amd64.pyd).
release_dir = os.path.join(build_temp, "Release")
candidates = glob.glob(os.path.join(release_dir, "lib_rds_parser*"))
if not candidates:
raise RuntimeError(
f"Cannot find compiled library in {release_dir}. "
f"Contents: {os.listdir(release_dir) if os.path.isdir(release_dir) else 'dir not found'}"
)
# Prefer .pyd files over .lib/.exp
pyd_files = [c for c in candidates if c.endswith(".pyd")]
src_name = pyd_files[0] if pyd_files else candidates[0]
os.makedirs(outpath, exist_ok=True)
shutil.copyfile(src_name, os.path.join(outpath, "lib_rds_parser.pyd"))


if __name__ == "__main__":
Expand Down
Loading