diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 860eb0729..6ff3a5082 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,3 +78,6 @@ jobs: - name: Validate schemas run: make schemas + + - name: Validate that all properties have schemas + run: make audit_property_definitions diff --git a/optimade.rst b/optimade.rst index 7cf230f70..960556b96 100644 --- a/optimade.rst +++ b/optimade.rst @@ -69,6 +69,8 @@ OPTIMADE API specification v1.4.0~develop .. role:: type(literal) +.. role:: property-example(literal) + .. role:: property-fail(literal) .. role:: endpoint(literal) @@ -1321,7 +1323,7 @@ The API implementation MAY provide other entry types than the ones standardized Such entry types MUST be prefixed by a database-provider-specific prefix (i.e., the resource objects' :property:`type` value should start with the database-provider-specific prefix, e.g., :property:`type` = :val:`_exmpl_workflows`). Each custom entry type SHOULD be served at a corresponding entry listing endpoint under the versioned or unversioned base URL that serves the API with the same name (i.e., equal to the resource objects' :property:`type` value, e.g., :endpoint:`/_exmpl_workflows`). It is RECOMMENDED to align with the OPTIMADE API specification practice of using a plural for entry resource types and entry type endpoints. -Any custom entry listing endpoint MUST also be added to the :property:`available_endpoints` and :property:`entry_types_by_format` attributes of the `Base Info Endpoint`_. +Any custom entry listing endpoint MUST also be added to the :field:`available_endpoints` and :field:`entry_types_by_format` attributes of the `Base Info Endpoint`_. For more on custom endpoints, see `Custom Extension Endpoints`_. @@ -1870,9 +1872,9 @@ The links endpoint MUST be provided under the versioned or unversioned base URL Link Types ~~~~~~~~~~ -Each link has a :property:`link_type` attribute that specifies the type of the linked relation. +Each link has a :field:`link_type` attribute that specifies the type of the linked relation. -The :property:`link_type` MUST be one of the following values: +The :field:`link_type` MUST be one of the following values: - :field-val:`child`: a link to another OPTIMADE implementation that MUST be within the same provider. This allows the creation of a tree-like structure of databases by pointing to children sub-databases. @@ -1914,17 +1916,17 @@ The resource objects' response dictionaries MUST include the following fields: - **link\_type**: a string containing the link type. It MUST be one of the values listed above in section `Link Types`_. - - **aggregate**: a string indicating whether a client that is following links to aggregate results from different OPTIMADE implementations should follow this link or not. This flag SHOULD NOT be indicated for links where :property:`link_type` is not :val:`child`. + - **aggregate**: a string indicating whether a client that is following links to aggregate results from different OPTIMADE implementations should follow this link or not. This flag SHOULD NOT be indicated for links where :field:`link_type` is not :val:`child`. If not specified, clients MAY assume that the value is :val:`ok`. If specified, and the value is anything different than :val:`ok`, the client MUST assume that the server is suggesting not to follow the link during aggregation by default (also if the value is not among the known ones, in case a future specification adds new accepted values). Specific values indicate the reason why the server is providing the suggestion. - A client MAY follow the link anyway if it has reason to do so (e.g., if the client is looking for all test databases, it MAY follow the links where :property:`aggregate` has value :val:`test`). + A client MAY follow the link anyway if it has reason to do so (e.g., if the client is looking for all test databases, it MAY follow the links where :field:`aggregate` has value :val:`test`). If specified, it MUST be one of the values listed in section `Link Aggregate Options`_. - - **no_aggregate_reason**: an OPTIONAL human-readable string indicating the reason for suggesting not to aggregate results following the link. It SHOULD NOT be present if :property:`aggregate` has value :val:`ok`. + - **no_aggregate_reason**: an OPTIONAL human-readable string indicating the reason for suggesting not to aggregate results following the link. It SHOULD NOT be present if :field:`aggregate` has value :val:`ok`. Example: @@ -2023,11 +2025,11 @@ Example: Internal Links: Root and Child Links ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Any number of resource objects with :property:`link_type` equal to :val:`child` MAY be present as part of the :field:`data` list. +Any number of resource objects with :field:`link_type` equal to :val:`child` MAY be present as part of the :field:`data` list. A :val:`child` object represents a "link" to an OPTIMADE implementation within the same provider exactly one layer **below** the current implementation's layer. -Exactly one resource object with :property:`link_type` equal to :val:`root` MUST be present as part of the :field:`data` list. -Note: the same implementation may of course be linked by other implementations via a :endpoint:`/links` endpoint with :property:`link_type` equal to :val:`external`. +Exactly one resource object with :field:`link_type` equal to :val:`root` MUST be present as part of the :field:`data` list. +Note: the same implementation may of course be linked by other implementations via a :endpoint:`/links` endpoint with :field:`link_type` equal to :val:`external`. The :val:`root` resource object represents a link to the topmost OPTIMADE implementation of the current provider. By following :val:`child` links from the :val:`root` object recursively, it MUST be possible to reach the current OPTIMADE implementation. @@ -2038,7 +2040,7 @@ In practice, this forms a tree structure for the OPTIMADE implementations of a p List of Providers Links ~~~~~~~~~~~~~~~~~~~~~~~ -Resource objects with :property:`link_type` equal to :val:`providers` MUST point to an `Index Meta-Database`_ that supplies a list of OPTIMADE database providers. +Resource objects with :field:`link_type` equal to :val:`providers` MUST point to an `Index Meta-Database`_ that supplies a list of OPTIMADE database providers. The intention is to be able to auto-discover all providers of OPTIMADE implementations. A list of known database providers can be retrieved as described in section `Namespace Prefixes`_. @@ -2054,12 +2056,12 @@ This will make all OPTIMADE databases and implementations by the provider discov Link Aggregate Options ~~~~~~~~~~~~~~~~~~~~~~ -If specified, the :property:`aggregate` attributed MUST have one of the following values: +If specified, the :field:`aggregate` attributed MUST have one of the following values: - :val:`ok` (default value, if unspecified): it is ok to follow this link when aggregating OPTIMADE results. - :val:`test`: the linked database is a test database, whose content might not be correct or might not represent physically-meaningful data. Therefore by default the link should not be followed. - :val:`staging`: the linked database is almost production-ready, but final checks on its content are being performed, so the content might still contain errors. Therefore by default the link should not be followed. -- :val:`no`: any other reason to suggest not to follow the link during aggregation of OPTIMADE results. The implementation MAY provide mode details in a human-readable form via the attribute :property:`no-aggregate-reason`. +- :val:`no`: any other reason to suggest not to follow the link during aggregation of OPTIMADE results. The implementation MAY provide mode details in a human-readable form via the attribute :field:`no_aggregate_reason`. Custom Extension Endpoints -------------------------- @@ -2100,9 +2102,9 @@ The following tokens are used in the filter query component: Examples of valid property names: - - :property:`band_gap` - - :property:`cell_length_a` - - :property:`cell_volume` + - :property-example:`band_gap` + - :property-example:`cell_length_a` + - :property-example:`cell_volume` Examples of incorrect property names: @@ -4389,7 +4391,7 @@ checksums - **Support**: OPTIONAL support in implementations, i.e., MAY be :val:`null`. - **Query**: Support for queries on this property is OPTIONAL. - - Supported dictionary keys: :property:`md5`, :property:`sha1`, :property:`sha224`, :property:`sha256`, :property:`sha384`, :property:`sha512`. + - Supported dictionary keys: :field:`md5`, :field:`sha1`, :field:`sha224`, :field:`sha256`, :field:`sha384`, :field:`sha512`. Checksums outside this list MAY be used, but their names MUST be prefixed by a database-provider-specific namespace prefix (see appendix `Namespace Prefixes`_). atime diff --git a/tests/makefiles/Makelocal-audit b/tests/makefiles/Makelocal-audit index ed0982a42..4f9ca12cc 100644 --- a/tests/makefiles/Makelocal-audit +++ b/tests/makefiles/Makelocal-audit @@ -4,7 +4,7 @@ .PHONY: audit -audit: audit_grammars audit_authors audit_spelling audit_schemas +audit: audit_grammars audit_authors audit_spelling audit_schemas audit_property_definitions @echo "==============================================" @echo "Audit of specification completed successfully." @echo "==============================================" @@ -35,3 +35,9 @@ audit_spelling: audit_schemas: $(MAKE) validate_schemas + + +.PHONY: audit_property_definitions + +audit_property_definitions: + tests/scripts/check_property_definitions.sh diff --git a/tests/scripts/check_property_definitions.sh b/tests/scripts/check_property_definitions.sh new file mode 100755 index 000000000..5bd51388a --- /dev/null +++ b/tests/scripts/check_property_definitions.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# Script to verify that all properties in the specification have corresponding schema definitions +# Compares fields extracted from optimade.rst with schema definition files + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' # No Color + +# Extract fields from specification +spec_fields=$(tests/scripts/extract_entry_fields.sh) + +# Track errors +errors=0 +warnings=0 + +echo "Checking property definitions against specification..." +echo "" + +# Function to check if a property definition file exists +check_property() { + local entry_type=$1 + local property=$2 + local version=${3:-v1.2} # Default to v1.2 + + # Check in the main entry type directory first + local primary_path="schemas/src/defs/$version/properties/optimade/$entry_type/$property.yaml" + + # Also check v1.3 for newer properties + local v13_path="schemas/src/defs/v1.3/properties/optimade/$entry_type/$property.yaml" + + # Check core properties (shared across types) + local core_path="schemas/src/defs/$version/properties/core/$property.yaml" + + if [[ -f "$primary_path" ]] || [[ -f "$v13_path" ]] || [[ -f "$core_path" ]]; then + return 0 + else + return 1 + fi +} + +# Get the entry types from the JSON output +entry_types=$(echo "$spec_fields" | jq -r 'keys[]') + +for entry_type in $entry_types; do + echo "Checking $entry_type..." + + # Special handling for _common (these should be in core/) + if [[ "$entry_type" == "_common" ]]; then + properties=$(echo "$spec_fields" | jq -r '.["_common"][]') + for property in $properties; do + # Common properties should exist in core/ or be shared + if [[ -f "schemas/src/defs/v1.2/properties/core/$property.yaml" ]]; then + echo -e " ${GREEN}✓${NC} $property (core)" + else + # Check if it exists in any entry type + found=false + for et in $entry_types; do + if check_property "$et" "$property"; then + echo -e " ${GREEN}✓${NC} $property (in $et)" + found=true + break + fi + done + if [[ "$found" == "false" ]]; then + echo -e " ${RED}✗${NC} $property - MISSING" + ((errors++)) + fi + fi + done + else + properties=$(echo "$spec_fields" | jq -r --arg et "$entry_type" '.[$et][]') + + # Skip if no properties (empty array) - this is OK for some entry types like calculations + if [[ -z "$properties" ]]; then + echo -e " No specific properties with ~~~~ underlines (uses common properties only)" + continue + fi + + for property in $properties; do + if check_property "$entry_type" "$property"; then + echo -e " ${GREEN}✓${NC} $property" + else + # Special case: references properties are defined but not with ~~~~ in spec + if [[ "$entry_type" == "references" ]]; then + # References are documented differently, check if the file exists anyway + if [[ -f "schemas/src/defs/v1.2/properties/optimade/references/$property.yaml" ]]; then + echo -e " ${GREEN}✓${NC} $property" + else + echo -e " ${RED}✗${NC} $property - MISSING" + ((errors++)) + fi + else + echo -e " ${RED}✗${NC} $property - MISSING" + ((errors++)) + fi + fi + done + fi + echo "" +done + +# Also check for orphaned schema files (schemas without spec entries) +echo "Checking for properties defined in schemas but not in specification..." +echo "" + +for entry_type in structures files references calculations trajectories; do + schema_dir="schemas/src/defs/v1.2/properties/optimade/$entry_type" + + if [[ ! -d "$schema_dir" ]]; then + # Check v1.3 + schema_dir="schemas/src/defs/v1.3/properties/optimade/$entry_type" + if [[ ! -d "$schema_dir" ]]; then + continue + fi + fi + + # Get properties from spec for this entry type + if [[ "$entry_type" == "trajectories" ]]; then + spec_props=$(echo "$spec_fields" | jq -r '.trajectories[]?' 2>/dev/null || echo "") + else + spec_props=$(echo "$spec_fields" | jq -r --arg et "$entry_type" '.[$et][]?' 2>/dev/null || echo "") + fi + + # Also include common properties + common_props=$(echo "$spec_fields" | jq -r '._common[]?' 2>/dev/null || echo "") + all_spec_props=$(echo -e "$spec_props\n$common_props" | sort | uniq) + + # Find all schema files + for schema_file in "$schema_dir"/*.yaml; do + if [[ ! -f "$schema_file" ]]; then + continue + fi + + property=$(basename "$schema_file" .yaml) + + # Check if this property is in the spec + if echo "$all_spec_props" | grep -q "^${property}$"; then + : # Found, nothing to do + else + if [[ "$entry_type" == "references" ]]; then + # References properties might not be in spec with ~~~~ but still valid + if grep -q "^${property}$" <<< "$spec_props"; then + echo -e " ${GREEN}✓${NC} $entry_type/$property" + continue + fi + else + echo -e " ${YELLOW}⚠${NC} $entry_type/$property - Schema exists but not in specification (or uses different formatting)" + ((warnings++)) + fi + fi + done +done + +echo "" +echo "==============================================" +echo "Summary:" +echo " Errors: $errors" +echo " Warnings: $warnings" +echo "==============================================" + +if [[ $errors -gt 0 ]]; then + exit 1 +else + exit 0 +fi diff --git a/tests/scripts/extract_entry_fields.sh b/tests/scripts/extract_entry_fields.sh new file mode 100755 index 000000000..80fb6ac83 --- /dev/null +++ b/tests/scripts/extract_entry_fields.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# Script to extract field names from optimade.rst Entry List section +# Extracts properties with ~~~~ underlines and groups by entry type + +input_file="optimade.rst" + +if [[ ! -f "$input_file" ]]; then + echo "Error: $input_file not found" >&2 + exit 1 +fi + +# Use awk to process the entire file +awk ' +BEGIN { + in_entry_list = 0 + in_section = 0 + current_section = "" + prev_line = "" + print "{" + first_section = 1 +} + +# Detect "Entry List" section start +/^Entry List$/ { + getline # Read the ===== line + in_entry_list = 1 + next +} + +# Exit Entry List section when we hit another major section (with ======) +in_entry_list && /^=+$/ && prev_line !~ /^Entry List$/ { + in_entry_list = 0 +} + +# Detect "Properties Used by Multiple Entry Types" +in_entry_list && /^Properties Used by Multiple Entry Types$/ { + if (in_section) { + print " ]," + } else if (!first_section) { + print "," + } + print " \"_common\": [" + current_section = "_common" + in_section = 1 + first_section = 0 + first_field = 1 + next +} + +# Detect entry type sections (e.g., "Structures Entries") +in_entry_list && /^[A-Z][a-z]+ Entries$/ { + if (in_section) { + print "" + print " ]," + } else if (!first_section) { + print "," + } + # Extract entry type name and convert to lowercase + entry_type = tolower($1) + printf " \"%s\": [\n", entry_type + current_section = entry_type + in_section = 1 + first_section = 0 + first_field = 1 + next +} + +# Detect field names (lines with ~~~~ underneath) +in_section && /^~+$/ && prev_line ~ /^[a-z_][a-z_0-9\\]*$/ { + # prev_line contains the field name + if (!first_field) { + print "," + } + # Handle escaped underscores in field names + field = prev_line + gsub(/\\/, "", field) # Remove backslashes + printf " \"%s\"", field + first_field = 0 +} + +# Store current line for next iteration +{ + prev_line = $0 +} + +END { + if (in_section) { + print "" + print " ]" + } + print "}" +} +' "$input_file"