Skip to content
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[workspace]
members = [
"acceptance",
"derive-macros",
"ffi",
"kernel",
"kernel/examples/*",
Expand Down
20 changes: 20 additions & 0 deletions derive-macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[package]
name = "derive-macros"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
keywords.workspace = true
license.workspace = true
repository.workspace = true
readme.workspace = true
version.workspace = true

[lib]
proc-macro = true

[dependencies]
proc-macro2 = "1"
syn = { version = "2.0", features = ["extra-traits"] }
quote = "1.0"


88 changes: 88 additions & 0 deletions derive-macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use proc_macro2::{Ident, TokenStream};
use quote::{quote, quote_spanned};
use syn::spanned::Spanned;
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArguments, Type};

#[proc_macro_derive(Schema, attributes(schema))]
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
Comment thread
nicklan marked this conversation as resolved.
let input = parse_macro_input!(input as DeriveInput);
let struct_ident = input.ident;

let schema_fields = gen_schema_fields(&input.data);
let output = quote! {
impl crate::actions::schemas::GetField for #struct_ident {
fn get_field(name: impl Into<String>) -> crate::schema::StructField {
use crate::actions::schemas::GetField;
crate::schema::StructField::new(
name,
crate::schema::StructType::new(vec![
#schema_fields
]),
// TODO: Ensure correct. By default not nullable, only can be made nullable by
// being wrapped in an Option
false,
)
}
}
};
proc_macro::TokenStream::from(output)
}

// turn our struct name into the schema name, goes from snake_case to camelCase
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know where to put the doc comment, but somewhere we should be careful to explain that the actual field names are all mandated by Delta spec, and so the user of this macro is responsible to ensure that e.g. Metadata::schema_string is the snake-case-ified version of schemaString from Delta's Change Metadata action, in order to keep rust happy. This macro is written with the assumption that it merely undoes that (previously correctly performed) transformation.

The same explains why it's ok to use to_ascii_uppercase below -- all Delta field names are plain ASCII.

fn get_schema_name(name: &Ident) -> Ident {
let snake_name = name.to_string();
let mut next_caps = false;
let ret: String = snake_name
.chars()
.filter_map(|c| {
if c == '_' {
next_caps = true;
None
} else if next_caps {
next_caps = false;
// This assumes we're using ascii, should be okay
Some(c.to_ascii_uppercase())
} else {
Some(c)
}
})
.collect();
Ident::new(&ret, name.span())
}

fn gen_schema_fields(data: &Data) -> TokenStream {
let fields = match data {
Data::Struct(DataStruct {
fields: Fields::Named(fields),
..
}) => &fields.named,
_ => panic!("this derive macro only works on structs with named fields"),
};

let schema_fields = fields.iter().map(|field| {
let name = field.ident.as_ref().unwrap(); // we know these are named fields
let name = get_schema_name(name);
match field.ty {
Type::Path(ref type_path) => {
if let Some(fin) = type_path.path.segments.iter().last() {
let type_ident = &fin.ident;
if let PathArguments::AngleBracketed(angle_args) = &fin.arguments {
quote_spanned! {field.span()=>
#type_ident::#angle_args::get_field(stringify!(#name))
}
} else {
quote_spanned! {field.span()=>
#type_ident::get_field(stringify!(#name))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to emit the fully qualified type name, in case the user didn't use the (full) path to it?
(especially since, if I understand correctly, this is an unresolved token stream, so any qualifiers the user gave are probably needed for it to compile at all)

}
}
} else {
panic!("Couldn't get type");
}
}
_ => {
panic!("Can't handle type: {:?}", field.ty);
}
}
});
quote! { #(#schema_fields),* }
}
3 changes: 3 additions & 0 deletions kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ url = "2"
uuid = "1.3.0"
z85 = "3.0.5"

# bring in our derive macros
derive-macros = { path = "../derive-macros" }

# used for developer-visibility
visibility = "0.1.0"

Expand Down
4 changes: 3 additions & 1 deletion kernel/src/actions/deletion_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ use std::io::{Cursor, Read};
use std::sync::Arc;

use bytes::Bytes;
use derive_macros::Schema;
use roaring::RoaringTreemap;
use url::Url;

use crate::{DeltaResult, Error, FileSystemClient};

#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Schema)]
#[schema(name = deletionVector)]
pub struct DeletionVectorDescriptor {
/// A single character to indicate how to access the DV. Legal options are: ['u', 'i', 'p'].
pub storage_type: String,
Expand Down
Loading