diff --git a/mkdocs.yml b/mkdocs.yml index c81a99bc26..de7ed1704e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -47,6 +47,7 @@ nav: - Coordinate systems: appendices/coordinate-systems.md - Quantitative MRI: appendices/qmri.md - Arterial Spin Labeling: appendices/arterial-spin-labeling.md + - Media files: appendices/media-files.md - Cross modality correspondence: appendices/cross-modality-correspondence.md - Changelog: CHANGES.md - The BIDS Website: diff --git a/src/appendices/media-files.md b/src/appendices/media-files.md new file mode 100644 index 0000000000..62759f3dfb --- /dev/null +++ b/src/appendices/media-files.md @@ -0,0 +1,165 @@ +# Media Files + +## Introduction + +Several BIDS datatypes make use of media files — audio recordings, video recordings, +combined audio-video recordings, and still images. +This appendix defines the common file formats, metadata conventions, +and codec identification schemes shared across all datatypes that use media files. + +The following media suffixes are defined: + +{{ MACROS___make_suffix_table(["audio", "video", "audiovideo", "image"]) }} + +Datatypes that incorporate media files (for example, behavioral recordings or stimuli) +define their own file-naming rules, directory placement, and datatype-specific metadata. +The conventions described here apply uniformly to all such datatypes. + +### Relationship to the `photo` suffix + +The media file definitions introduced here generalize the concept of all media in BIDS. +The existing `photo` suffix (used for photographs of anatomical landmarks, +head localization coils, and tissue samples) predates this framework and covers +a narrower use case — still images in specific electrophysiology and microscopy datatypes. + +The media suffixes (`audio`, `video`, `audiovideo`, `image`) are intended as the +general-purpose mechanism for all media content in BIDS. +The media file framework should be generally adopted for new datatypes, +and a future proposal may deprecate the `photo` suffix in favor of the broader `image` +suffix with appropriate migration tooling +(see [bids-utils](https://github.com/bids-standard/bids-utils)). + +## Supported Formats + +### Audio formats + +{{ MACROS___make_extension_table(["wav", "mp3", "aac", "ogg"]) }} + +### Video container formats + +{{ MACROS___make_extension_table(["mp4", "avi", "mkv", "webm"]) }} + +### Image formats + +{{ MACROS___make_extension_table(["jpg", "png", "svg", "webp", "tif", "tiff"]) }} + +When choosing a format, consider the trade-off between file size, data fidelity, openness and prevalence of the format in the domain of application. +Uncompressed or lossless formats (WAV, PNG, TIFF) preserve full quality +but produce larger files. +Lossy formats (MP3, AAC, JPEG) significantly reduce file size +at the cost of some data loss. + +## Media Stream Metadata + +Media files SHOULD be accompanied by a JSON sidecar file +containing technical metadata about the media streams. +The following metadata fields are defined for media files. + +### Duration + +Applies to suffixes: `audio`, `video`, `audiovideo`. + +{{ MACROS___make_sidecar_table("media.MediaDuration") }} + +`RecordingDuration` reuses the existing BIDS metadata field already defined for +electrophysiology recordings (EEG, iEEG, MEG, and others). + +### Audio stream properties + +Applies to suffixes: `audio`, `audiovideo`. + +{{ MACROS___make_sidecar_table("media.MediaAudioProperties") }} + +Note: `AudioSampleRate` is used instead of the existing `SamplingFrequency` field +because audio-video files require distinguishing the audio sampling rate from the +video frame rate. The `Audio` prefix makes this unambiguous in multi-stream containers. + +### Image properties + +Applies to suffixes: `video`, `audiovideo`, `image`. + +{{ MACROS___make_sidecar_table("media.MediaImageProperties") }} + +### Video stream properties + +Applies to suffixes: `video`, `audiovideo`. + +{{ MACROS___make_sidecar_table("media.MediaVideoProperties") }} + +## Codec Identification + +Codec identification uses two complementary naming systems: + +### FFmpeg codec names (RECOMMENDED) + +The `AudioCodec` and `VideoCodec` fields use +[FFmpeg codec names](https://www.ffmpeg.org/ffmpeg-codecs.html) as the RECOMMENDED +convention. These names are the de facto standard in scientific computing and can be +auto-extracted from media files using: + +```bash +ffprobe -v quiet -print_format json -show_streams +``` + +### RFC 6381 codec strings (OPTIONAL) + +The `AudioCodecRFC6381` and `VideoCodecRFC6381` fields use +[RFC 6381](https://datatracker.ietf.org/doc/html/rfc6381) codec strings. +These provide precise codec profile and level information useful for +web and broadcast interoperability. + +### Common codec reference + +| Codec | FFmpeg Name | RFC 6381 String | Notes | +| -------------- | ----------- | ------------------ | ----------------------- | +| H.264 / AVC | `h264` | `avc1.640028` | Most widely supported | +| H.265 / HEVC | `hevc` | `hev1.1.6.L93.B0` | High efficiency | +| VP9 | `vp9` | `vp09.00.10.08` | Open, royalty-free | +| AV1 | `av1` | `av01.0.01M.08` | Next-gen open codec | +| AAC-LC | `aac` | `mp4a.40.2` | Default audio for MP4 | +| MP3 | `mp3` | `mp4a.6B` | Legacy lossy audio | +| Opus | `opus` | `Opus` | Open, low-latency audio | +| FLAC | `flac` | `fLaC` | Open lossless audio | +| PCM 16-bit LE | `pcm_s16le` | — | Uncompressed (WAV) | + +The FFmpeg name column shows the value to use for `VideoCodec` or `AudioCodec`. +The RFC 6381 column shows the value for `VideoCodecRFC6381` or `AudioCodecRFC6381`. +RFC 6381 strings vary by profile and level; +the values shown are representative examples. + +## Privacy Considerations + +Media files — particularly audio and video recordings — may contain +personally identifiable information (PII), including but not limited to: + +- Voices and speech content +- Facial features and other physical characteristics +- Background environments that could identify locations +- Metadata embedded in file headers (for example, GPS coordinates, device identifiers) + +Researchers MUST ensure that sharing of media files complies with the +informed consent obtained from participants and with applicable privacy regulations. +De-identification techniques (for example, voice distortion, face blurring, +metadata stripping) SHOULD be applied where appropriate before data sharing. + +## Example + +A complete sidecar JSON file for an audio-video recording: + +```json +{ + "RecordingDuration": 312.5, + "VideoCodec": "h264", + "VideoCodecRFC6381": "avc1.640028", + "VideoFrameRate": 30, + "VideoFrameCount": 9375, + "ImageWidth": 1920, + "ImageHeight": 1080, + "ImagePixelFormat": "yuv420p", + "ImageBitDepth": 8, + "AudioCodec": "aac", + "AudioCodecRFC6381": "mp4a.40.2", + "AudioSampleRate": 48000, + "AudioChannelCount": 2 +} +``` diff --git a/src/modality-specific-files/behavioral-experiments.md b/src/modality-specific-files/behavioral-experiments.md index 81af91e65c..59b9e80a73 100644 --- a/src/modality-specific-files/behavioral-experiments.md +++ b/src/modality-specific-files/behavioral-experiments.md @@ -1,4 +1,4 @@ -# Behavioral experiments (with no neural recordings) +# Behavioral recordings !!! example "Example datasets" @@ -15,19 +15,14 @@ and a guide for using macros can be found at --> {{ MACROS___make_filename_template("raw", datatypes=["beh"]) }} -In addition to logs from behavioral experiments -performed alongside imaging data acquisitions, -one MAY also include data from experiments -performed with no neural recordings. -The results of those experiments MAY be stored in the `beh` directory -using the same formats for event timing (`_events.tsv`), -metadata (`_events.json`), -physiological (`_physio.tsv.gz`, `_physio.json`) -and other continuous recordings (`_stim.tsv.gz`, `_stim.json`) -as for tasks performed during MRI, electrophysiological or other neural recordings. -Additionally, events files -that do not include the mandatory `onset` and `duration` columns -MAY be included, +The `beh` directory MAY store behavioral recordings such as audio (`_audio.*`), video (`_video.*`), combined audio-video (`_audiovideo.*`), and still image (`_image.*`) recordings, physiological (`_physio.*`) recordings, and other continuous recordings (`_stim.tsv.gz`, `_stim.json`). +Audio, video, audio-video, and image recordings MAY be of subjects performing tasks, resting-state behavior, or recordings of stimuli being presented to the subject. +Audio/video recordings MAY occur simultaneously with other recordings, such as BOLD or EEG. +Relative timing between files may be determined by consulting the `scans.tsv` file. +If no `scans.tsv` file is present, the alignment is undefined. +The `beh` directory MAY also contain event timing files (`_events.tsv`) and their associated metadata (`_events.json`) for behavioral experiments that do not have corresponding neuroimaging or functional data. + +Additionally, events files that do not include the mandatory `onset` and `duration` columns MAY be included, but MUST be labeled `_beh.tsv` rather than `_events.tsv`. The following OPTIONAL columns are pre-defined for behavioral data files: @@ -76,6 +71,234 @@ A guide for using macros can be found at --> {{ MACROS___make_sidecar_table("beh.BEHInstitutionInformation") }} +## Audio, video, and audio-video recordings and images + +Audio and video recordings of behaving subjects MAY be stored in the `beh` directory +using the `_audio`, `_video`, and `_audiovideo` suffixes. +The `_audio` suffix is for audio-only recordings, `_video` for video-only recordings, +and `_audiovideo` for recordings that contain both audio and video streams. +These recordings are typically used to capture vocalizations, speech, facial expressions, +body movements, or other behavioral aspects during experimental tasks or rest periods. + +Still images captured during behavioral experiments MAY be stored in the `beh` directory +using the `_image` suffix. +These images are typically used for training frames for pose estimation, +snapshots of behavioral setups, or individual frames extracted from video recordings. + +!!! warning "Privacy and personally identifiable information" + + Audio and video recordings and images of human subjects often contain personally identifiable + information (PII) such as faces, voices, and other identifying features. + Data curators MUST take special care to ensure compliance with applicable privacy + regulations (such as HIPAA in the United States, GDPR in the European Union, or other + local data protection laws) when handling these recordings. + + These recordings are generally more suitable for internal use or for sharing + non-human subject data, unless appropriate privacy protections are implemented. + +### File formats + +Audio recordings MUST use one of the following extensions: + +- `.flac` - Free Lossless Audio Codec +- `.mp3` - MPEG Audio Layer III +- `.ogg` - Ogg Vorbis +- `.wav` - Waveform Audio File Format + +Video and audio-video recordings MUST use one of the following extensions: + +- `.mp4` - MPEG-4 Part 14 +- `.mkv` - Matroska video container +- `.avi` - Audio Video Interleave + +Image files MUST use one of the following extensions: + +- `.jpg` - JPEG image +- `.png` - Portable Network Graphics + +### Entities + +Audio and video files MAY use the following entities: + +- `task` - OPTIONAL for audio and video recordings +- `acq` - OPTIONAL, can distinguish different recording setups +- `run` - OPTIONAL, for multiple recordings with identical parameters +- `recording` - OPTIONAL, to differentiate simultaneous recordings from different angles, locations, or devices +- `split` - OPTIONAL, for continuous recordings split into multiple files + +### Examples + + +{{ MACROS___make_filetree_example( + { + "sub-01": { + "beh": { + "sub-01_task-rest_video.mp4": "", + "sub-01_task-rest_video.json": "", + "sub-01_task-interview_audiovideo.mp4": "", + "sub-01_task-interview_audiovideo.json": "", + "sub-01_task-stroop_recording-face_video.mp4": "", + "sub-01_task-stroop_recording-face_video.json": "", + "sub-01_task-stroop_recording-room_video.mp4": "", + "sub-01_task-stroop_recording-room_video.json": "", + "sub-01_task-rest_image.jpg": "", + "sub-01_task-rest_image.json": "", + "sub-01_task-vocalization_audio.wav": "", + "sub-01_task-vocalization_audio.json": "", + }, + }, + } +) }} + +For continuous recordings split into multiple files: + + +{{ MACROS___make_filetree_example( + { + "sub-01": { + "ses-01": { + "beh": { + "sub-01_ses-01_task-freeplay_run-01_split-001_video.mp4": "", + "sub-01_ses-01_task-freeplay_run-01_split-002_video.mp4": "", + "sub-01_ses-01_task-freeplay_run-01_split-003_video.mp4": "", + "sub-01_ses-01_task-freeplay_run-01_video.json": "", + }, + }, + }, + } +) }} + +### Sidecar JSON for audio, video, audio-video recordings, and images + +The following metadata fields are available for audio, video, audio-video recordings, and images: + + +{{ MACROS___make_sidecar_table("beh.AudioVideoImageDevice") }} + +!!! note "Licensing for recordings containing participants" + + Audio, video, and image recordings of participants may have different licensing + restrictions than the main dataset due to privacy considerations. The optional + `License` field can be used to specify different terms for individual recordings + that contain identifiable participant data. If not specified, the recording + inherits the license from `dataset_description.json`. + +{{ MACROS___make_sidecar_table("beh.AudioVideoDuration") }} + +The following fields are available for audio recordings (`_audio`) and audio-video recordings (`_audiovideo`): + +{{ MACROS___make_sidecar_table("beh.AudioStreams") }} + +The following fields are available for video recordings (`_video`) and audio-video recordings (`_audiovideo`): + +{{ MACROS___make_sidecar_table("beh.VideoStreams") }} + +The following fields are available for image files (`_image`): + +{{ MACROS___make_sidecar_table("beh.AudioVideoImageDevice") }} + +{{ MACROS___make_sidecar_table("beh.ImageProperties") }} + +### Example audio-video sidecar JSON + +For an audio-video file containing both video and audio streams: + +```JSON +{ + "TaskName": "RestingState", + "Device": "Sony FDR-AX53", + "AudioChannelCount": 2, + "AudioSampleRate": 48000, + "FrameRate": 30.0, + "Height": 1080, + "Width": 1920, + "Duration": 600.5 +} +``` + +### Example video sidecar JSON + +For a video-only recording: + +```JSON +{ + "TaskName": "RestingState", + "Device": "Sony FDR-AX53", + "FrameRate": 30.0, + "Height": 1080, + "Width": 1920, + "Duration": 600.5 +} +``` + +### Example audio sidecar JSON + +For an audio-only recording: + +```JSON +{ + "TaskName": "Vocalization", + "Device": "Zoom H6 Handy Recorder", + "AudioChannelCount": 2, + "AudioSampleRate": 44100, + "Duration": 300.2 +} +``` + +### Example image sidecar JSON + +For a still image: + +```JSON +{ + "TaskName": "Reaching", + "Device": "GoPro Hero 10", + "Height": 1080, + "Width": 1920, + "CameraPosition": "overhead" +} +``` + +### Annotations and events + +Behavioral annotations or event markers for audio and video recordings +SHOULD be stored in accompanying `_events.tsv` files following the standard +[events file format](../modality-agnostic-files/events.md). +These events files use the same filename entities as the audio/video file they describe, +but with the `_events` suffix. + +For example: + + +{{ MACROS___make_filetree_example( + { + "sub-01": { + "beh": { + "sub-01_task-speech_audio.wav": "", + "sub-01_task-speech_audio.json": "", + "sub-01_task-speech_events.tsv": "", + "sub-01_task-speech_events.json": "", + }, + }, + } +) }} + ## Example `_beh.tsv` ```tsv diff --git a/src/schema/objects/extensions.yaml b/src/schema/objects/extensions.yaml index 3c7ef248fa..3032d4f92b 100644 --- a/src/schema/objects/extensions.yaml +++ b/src/schema/objects/extensions.yaml @@ -1,5 +1,11 @@ --- # This file describes valid file extensions in the specification. +aac: + value: .aac + display_name: Advanced Audio Coding + description: | + An [Advanced Audio Coding](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) + audio file. ave: value: .ave display_name: AVE # not sure what ave stands for @@ -7,6 +13,12 @@ ave: File containing data averaged by segments of interest. Used by KIT, Yokogawa, and Ricoh MEG systems. +avi: + value: .avi + display_name: Audio Video Interleave + description: | + An [Audio Video Interleave](https://en.wikipedia.org/wiki/Audio_Video_Interleave) + media container file. bdf: value: .bdf display_name: Biosemi Data Format @@ -114,6 +126,12 @@ fif: display_name: Functional Imaging File Format description: | An MEG file format used by Neuromag, Elekta, and MEGIN. +flac: + value: .flac + display_name: Free Lossless Audio Codec + description: | + A [FLAC](https://en.wikipedia.org/wiki/FLAC) audio file. + This format is commonly used for behavioral audio recordings. jpg: value: .jpg display_name: Joint Photographic Experts Group Format @@ -153,6 +171,22 @@ md: display_name: Markdown description: | A Markdown file. +mkv: + value: .mkv + display_name: Matroska Video + description: | + A [Matroska](https://www.matroska.org/) media container file. +mp3: + value: .mp3 + display_name: MP3 Audio + description: | + An [MP3](https://en.wikipedia.org/wiki/MP3) audio file. +mp4: + value: .mp4 + display_name: MPEG-4 Part 14 + description: | + An [MPEG-4 Part 14](https://en.wikipedia.org/wiki/MP4_file_format) + media container file. mefd: value: .mefd/ display_name: Multiscale Electrophysiology File Format Version 3.0 @@ -201,6 +235,12 @@ nwb: A [Neurodata Without Borders](https://nwb-schema.readthedocs.io/en/latest/) file. Each recording consists of a single `.nwb` file. +ogg: + value: .ogg + display_name: Ogg Vorbis + description: | + An [Ogg](https://en.wikipedia.org/wiki/Ogg) audio file, + typically containing Vorbis-encoded audio. OMEBigTiff: value: .ome.btf display_name: Open Microscopy Environment BigTIFF @@ -249,6 +289,11 @@ snirf: display_name: Shared Near Infrared Spectroscopy Format description: | HDF5 file organized according to the [SNIRF specification](https://github.com/fNIRS/snirf) +svg: + value: .svg + display_name: Scalable Vector Graphics + description: | + A [Scalable Vector Graphics](https://en.wikipedia.org/wiki/SVG) image file. sqd: value: .sqd display_name: SQD @@ -263,6 +308,12 @@ tif: display_name: Tag Image File Format description: | A [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) file. +tiff: + value: .tiff + display_name: Tag Image File Format + description: | + A [Tag Image File Format](https://en.wikipedia.org/wiki/TIFF) image file. + The `.tiff` extension is the long form of `.tif`. trg: value: .trg display_name: KRISS TRG @@ -307,6 +358,23 @@ vmrk: A text marker file in the [BrainVision Core Data Format](https://www.brainproducts.com/support-resources/brainvision-core-data-format-1-0/). These files come in three-file sets, including a `.vhdr`, a `.vmrk`, and a `.eeg` file. +wav: + value: .wav + display_name: Waveform Audio + description: | + A [Waveform Audio File Format](https://en.wikipedia.org/wiki/WAV) + audio file, typically containing uncompressed PCM audio. +webm: + value: .webm + display_name: WebM + description: | + A [WebM](https://www.webmproject.org/) media container file, + typically containing VP8/VP9 video and Vorbis/Opus audio. +webp: + value: .webp + display_name: WebP Image + description: | + A [WebP](https://en.wikipedia.org/wiki/WebP) image file. Any: value: .* display_name: Any Extension diff --git a/src/schema/objects/metadata.yaml b/src/schema/objects/metadata.yaml index 347f6a53c0..747e081e9a 100644 --- a/src/schema/objects/metadata.yaml +++ b/src/schema/objects/metadata.yaml @@ -237,6 +237,42 @@ AttenuationCorrectionMethodReference: description: | Reference paper for the attenuation correction method used. type: string +AudioChannelCount: + name: AudioChannelCount + display_name: Audio Channel Count + description: | + Number of audio channels in the audio or audio-video file + (for example, `1` for mono, `2` for stereo). + type: integer + minimum: 1 +AudioCodec: + name: AudioCodec + display_name: Audio Codec + description: | + The audio codec used to encode the audio stream, expressed as an + [FFmpeg codec name](https://www.ffmpeg.org/ffmpeg-codecs.html) + (for example, `"aac"`, `"mp3"`, `"opus"`, `"flac"`, `"pcm_s16le"`). + This value can be auto-extracted using + `ffprobe -v quiet -print_format json -show_streams`. + type: string +AudioCodecRFC6381: + name: AudioCodecRFC6381 + display_name: Audio Codec (RFC 6381) + description: | + The audio codec expressed as an + [RFC 6381](https://datatracker.ietf.org/doc/html/rfc6381) codec string + (for example, `"mp4a.40.2"` for AAC-LC). + This representation is useful for web and broadcast interoperability. + type: string +AudioSampleRate: + name: AudioSampleRate + display_name: Audio Sample Rate + description: | + Sampling frequency of the audio stream, in Hz + (for example, `44100`, `48000`, `96000`). + type: number + exclusiveMinimum: 0 + unit: Hz Authors: name: Authors display_name: Authors @@ -923,6 +959,13 @@ Descriptors: - type: array items: type: string +Device: + name: Device + display_name: Device + description: | + Free-form description of the device used to record the data + (for example, `"iPhone 12"`, `"Canon EOS R5"`). + type: string DeviceSerialNumber: name: DeviceSerialNumber display_name: Device Serial Number @@ -1871,6 +1914,22 @@ ImageAcquisitionProtocol: [URI](SPEC_ROOT/common-principles.md#uniform-resource-indicator) (for example from [protocols.io](https://www.protocols.io/)). type: string +ImageBitDepth: + name: ImageBitDepth + display_name: Image Bit Depth + description: | + Bit depth per channel of the stored pixel data of the video frame or + image (for example, `8`, `10`, `12`, `16`). For multi-channel data + this is the depth of each individual channel. + When `ImagePixelFormat` is also provided, this field is redundant with + the bit depth encoded in the FFmpeg `pix_fmt` value (for example, + `yuv420p10le` -> 10) and the two MUST agree. `ImageBitDepth` is + nonetheless useful as a more directly discoverable summary, and as the + primary precision field for image-only sidecars whose producing tools + do not naturally surface `pix_fmt`. + type: integer + minimum: 1 + unit: bit ImageDecayCorrected: name: ImageDecayCorrected display_name: Image Decay Corrected @@ -1885,6 +1944,38 @@ ImageDecayCorrectionTime: `"TimeZero"` in the default unit seconds. type: number unit: s +ImageHeight: + name: ImageHeight + display_name: Image Height + description: | + Height of the video frame or image, in pixels. + Corresponds to the number of rows in the stored pixel grid as captured, + without applying any orientation correction that may be reported by + container metadata (for example, the EXIF `Orientation` tag). + type: integer + minimum: 1 + unit: px +ImagePixelFormat: + name: ImagePixelFormat + display_name: Image Pixel Format + description: | + The pixel format of the video frame or image, as reported by FFmpeg's + `pix_fmt` field (for example, `"yuv420p"`, `"yuv420p10le"`, `"gray16le"`, + `"rgb24"`). A single `pix_fmt` value encodes the color model, channel + count, chroma subsampling, and bit depth, and can be extracted + automatically with `ffprobe`. + type: string +ImageWidth: + name: ImageWidth + display_name: Image Width + description: | + Width of the video frame or image, in pixels. + Corresponds to the number of columns in the stored pixel grid as captured, + without applying any orientation correction that may be reported by + container metadata (for example, the EXIF `Orientation` tag). + type: integer + minimum: 1 + unit: px Immersion: name: Immersion display_name: Immersion @@ -4499,6 +4590,46 @@ VisionCorrection: Equipment used to correct participant vision during an experiment. Example: "spectacles", "lenses", "none". type: string +VideoCodec: + name: VideoCodec + display_name: Video Codec + description: | + The video codec used to encode the video stream, expressed as an + [FFmpeg codec name](https://www.ffmpeg.org/ffmpeg-codecs.html) + (for example, `"h264"`, `"hevc"`, `"vp9"`, `"av1"`). + This value can be auto-extracted using + `ffprobe -v quiet -print_format json -show_streams`. + type: string +VideoCodecRFC6381: + name: VideoCodecRFC6381 + display_name: Video Codec (RFC 6381) + description: | + The video codec expressed as an + [RFC 6381](https://datatracker.ietf.org/doc/html/rfc6381) codec string + (for example, `"avc1.640028"` for H.264 High Profile Level 4.0). + This representation is useful for web and broadcast interoperability. + type: string +VideoFrameCount: + name: VideoFrameCount + display_name: Video Frame Count + description: | + Total number of frames in the video stream. + For constant frame rate video this can be derived from `VideoFrameRate` + and `RecordingDuration`, but for variable frame rate (VFR) video the + derivation is undefined, so an explicit value is needed. + Also useful as an integrity check to detect truncated or corrupted files. + type: integer + minimum: 1 +VideoFrameRate: + name: VideoFrameRate + display_name: Video Frame Rate + description: | + The video frame rate of the video stream, in Hz + (for example, `24`, `25`, `29.97`, `30`, `60`). + For variable rate videos, this value should be the nominal frame rate. + type: number + exclusiveMinimum: 0 + unit: Hz VolumeTiming: name: VolumeTiming display_name: Volume Timing @@ -4638,3 +4769,65 @@ iEEGReference: this field should have a general description and the channel specific reference should be defined in the `channels.tsv` file. type: string + +AudioDuration: + name: AudioDuration + display_name: Audio Duration + description: | + Duration of the audio recording in seconds. + type: number + exclusiveMinimum: 0 + unit: s + +AudioBitDepth: + name: AudioBitDepth + display_name: Audio Bit Depth + description: | + Number of bits per sample in the audio recording. + + Common values include `16`, `24`, or `32`. + type: integer + minimum: 1 + +CameraPosition: + name: CameraPosition + display_name: Camera Position + description: | + Free-form description of the camera placement relative to the subject or scene. + + Examples include "front", "profile-left", "ceiling", "room-corner", or "overhead". + type: string + +Duration: + name: Duration + display_name: Duration + description: | + Total duration of the audio or video recording in seconds. + type: number + exclusiveMinimum: 0 + unit: s + +FrameRate: + name: FrameRate + display_name: Frame Rate + description: | + Frame rate of the video recording in frames per second (for example, `30.0`). + type: number + exclusiveMinimum: 0 + unit: Hz + +Height: + name: Height + display_name: Video Height + description: | + Height of the video in pixels (for example, `1080`). + type: integer + minimum: 1 + +Width: + name: Width + display_name: Video Width + description: | + Width of the video in pixels (for example, `1920`). + type: integer + minimum: 1 diff --git a/src/schema/objects/suffixes.yaml b/src/schema/objects/suffixes.yaml index 37cca59edf..6788bfcd92 100644 --- a/src/schema/objects/suffixes.yaml +++ b/src/schema/objects/suffixes.yaml @@ -516,6 +516,18 @@ asl: The complete ASL time series stored as a 4D NIfTI file in the original acquisition order, with possible volume types including: control, label, m0scan, deltam, cbf. +audio: + value: audio + display_name: Audio file + description: | + An audio data file containing one or more audio streams. + Common formats include WAV (uncompressed), MP3, AAC, and Ogg Vorbis. +audiovideo: + value: audiovideo + display_name: Audio-video file + description: | + A media file containing both audio and video streams. + Common containers include MP4, MKV, AVI, and WebM. aslcontext: value: aslcontext display_name: Arterial Spin Labeling Context @@ -666,6 +678,12 @@ ieeg: display_name: Intracranial Electroencephalography description: | Intracranial electroencephalography recording data. +image: + value: image + display_name: Image file + description: | + A still image data file. + Common formats include JPEG, PNG, SVG, WebP, and TIFF. inplaneT1: value: inplaneT1 display_name: Inplane T1 @@ -897,3 +915,9 @@ unloc: description: | MRS acquisitions run without localization. This includes signals detected using coil sensitivity only. +video: + value: video + display_name: Video file + description: | + A video data file containing one or more video streams but no audio. + Common containers include MP4, MKV, AVI, and WebM. diff --git a/src/schema/rules/files/raw/beh.yaml b/src/schema/rules/files/raw/beh.yaml index df6f9dac06..4d96c62b41 100644 --- a/src/schema/rules/files/raw/beh.yaml +++ b/src/schema/rules/files/raw/beh.yaml @@ -9,3 +9,88 @@ noncontinuous: - .json datatypes: - beh + entities: + subject: required + session: optional + task: required + acquisition: optional + run: optional + +# Audio recordings +audio: + suffixes: + - audio + extensions: + - .flac + - .mp3 + - .ogg + - .wav + - .json + datatypes: + - beh + entities: + subject: required + session: optional + task: optional + acquisition: optional + run: optional + recording: optional + split: optional + +# Video recordings +video: + suffixes: + - video + extensions: + - .mp4 + - .mkv + - .avi + - .json + datatypes: + - beh + entities: + subject: required + session: optional + task: optional + acquisition: optional + run: optional + recording: optional + split: optional + +# Combined audio-video recordings +audiovideo: + suffixes: + - audiovideo + extensions: + - .mp4 + - .mkv + - .avi + - .json + datatypes: + - beh + entities: + subject: required + session: optional + task: optional + acquisition: optional + run: optional + recording: optional + split: optional + +# Still images +image: + suffixes: + - image + extensions: + - .jpg + - .png + - .json + datatypes: + - beh + entities: + subject: required + session: optional + task: optional + acquisition: optional + run: optional + recording: optional diff --git a/src/schema/rules/sidecars/beh.yaml b/src/schema/rules/sidecars/beh.yaml index f2d8410914..545e7646f5 100644 --- a/src/schema/rules/sidecars/beh.yaml +++ b/src/schema/rules/sidecars/beh.yaml @@ -25,3 +25,48 @@ BEHInstitutionInformation: InstitutionName: recommended InstitutionAddress: recommended InstitutionalDepartmentName: recommended + +# Audio, Video, and Image metadata +AudioVideoImageDevice: + selectors: + - datatype == "beh" + - intersects([suffix], ["audio", "video", "audiovideo", "behimage"]) + fields: + Device: optional + DeviceSerialNumber: optional + License: optional + +AudioVideoDuration: + selectors: + - datatype == "beh" + - intersects([suffix], ["audio", "video", "audiovideo"]) + fields: + Duration: optional + +AudioStreams: + selectors: + - datatype == "beh" + - intersects([suffix], ["audio", "audiovideo"]) + fields: + AudioChannelCount: optional + AudioSampleRate: optional + AudioBitDepth: optional + +VideoStreams: + selectors: + - datatype == "beh" + - intersects([suffix], ["video", "audiovideo"]) + fields: + FrameRate: optional + Height: optional + Width: optional + CameraPosition: optional + +ImageProperties: + selectors: + - datatype == "beh" + - suffix == "behimage" + fields: + Height: optional + Width: optional + CameraPosition: optional diff --git a/src/schema/rules/sidecars/media.yaml b/src/schema/rules/sidecars/media.yaml new file mode 100644 index 0000000000..9e5dc25ed4 --- /dev/null +++ b/src/schema/rules/sidecars/media.yaml @@ -0,0 +1,37 @@ +# +# Groups of related metadata fields for media files +# + +--- +MediaDuration: + selectors: + - intersects([suffix], ["audio", "video", "audiovideo"]) + fields: + RecordingDuration: recommended + +MediaAudioProperties: + selectors: + - intersects([suffix], ["audio", "audiovideo"]) + fields: + AudioCodec: recommended + AudioSampleRate: recommended + AudioChannelCount: recommended + AudioCodecRFC6381: optional + +MediaImageProperties: + selectors: + - intersects([suffix], ["video", "audiovideo", "image"]) + fields: + ImageWidth: recommended + ImageHeight: recommended + ImagePixelFormat: optional + ImageBitDepth: optional + +MediaVideoProperties: + selectors: + - intersects([suffix], ["video", "audiovideo"]) + fields: + VideoCodec: recommended + VideoFrameRate: recommended + VideoFrameCount: recommended + VideoCodecRFC6381: optional diff --git a/tools/mkdocs_macros_bids/macros.py b/tools/mkdocs_macros_bids/macros.py index 2e7c2f893e..7738dcf295 100644 --- a/tools/mkdocs_macros_bids/macros.py +++ b/tools/mkdocs_macros_bids/macros.py @@ -203,6 +203,52 @@ def make_suffix_table(suffixes, src_path=None): return table +def make_extension_table(extensions, src_path=None): + """Generate a markdown table of file extension information. + + Parameters + ---------- + extensions : list of str + A list of the extension keys to include in the table. + Keys correspond to entries in the schema's objects.extensions + (for example, ``["wav", "mp3", "aac", "ogg"]``). + src_path : str or None + The file where this macro is called, which may be explicitly provided + by the "page.file.src_path" variable. + + Returns + ------- + table : str + A Markdown-format table containing the extension information. + """ + if src_path is None: + src_path = _get_source_path() + + schema_obj = schema.load_schema() + ext_objects = schema_obj["objects"]["extensions"] + + # Compute the relative path to the glossary from the calling file + src_dir = os.path.dirname(src_path) + glossary_path = os.path.relpath("glossary.md", src_dir) + + rows = [] + for ext_key in extensions: + ext = ext_objects[ext_key] + value = ext["value"] + display_name = ext["display_name"] + # Collapse multi-line description to single line + description = " ".join(ext["description"].strip().split()) + + # Link to glossary anchor + link = f"[{value}]({glossary_path}#objects.extensions.{ext_key})" + + rows.append(f"| {display_name} | {link} | {description} |") + + header = "| **Format** | **Extension** | **Description** |" + separator = "| --- | --- | --- |" + return "\n".join([header, separator] + rows) + + def make_metadata_table(field_info, src_path=None): """Generate a markdown table of metadata field information. diff --git a/tools/mkdocs_macros_bids/main.py b/tools/mkdocs_macros_bids/main.py index 7fa873247a..e4cbd2ba70 100644 --- a/tools/mkdocs_macros_bids/main.py +++ b/tools/mkdocs_macros_bids/main.py @@ -38,6 +38,7 @@ def define_env(env): ) env.macro(macros.make_glossary, "MACROS___make_glossary") env.macro(macros.make_suffix_table, "MACROS___make_suffix_table") + env.macro(macros.make_extension_table, "MACROS___make_extension_table") env.macro(macros.make_metadata_table, "MACROS___make_metadata_table") env.macro(macros.make_json_table, "MACROS___make_json_table") env.macro(macros.make_sidecar_table, "MACROS___make_sidecar_table") diff --git a/tools/schemacode/src/bidsschematools/tests/test_render_tables.py b/tools/schemacode/src/bidsschematools/tests/test_render_tables.py index 22676689d0..7cd77ad951 100644 --- a/tools/schemacode/src/bidsschematools/tests/test_render_tables.py +++ b/tools/schemacode/src/bidsschematools/tests/test_render_tables.py @@ -1,8 +1,16 @@ """Tests for the bidsschematools package.""" +import sys +from pathlib import Path + from bidsschematools.render import tables from bidsschematools.render.utils import normalize_requirements +# Make mkdocs_macros_bids importable +_macros_dir = Path(__file__).parents[5] / "tools" / "mkdocs_macros_bids" +if str(_macros_dir) not in sys.path: + sys.path.insert(0, str(_macros_dir)) + def test_make_entity_table(schema_obj): """ @@ -145,3 +153,39 @@ def test_make_columns_table(schema_obj): assert level.upper() in render_row assert level_addendum.split("\n")[0] in render_row assert description_addendum.split("\n")[0] in render_row + + +def test_make_extension_table(schema_obj): + """Test whether expected extensions are present and listed correctly. + + This tests the make_extension_table macro from mkdocs_macros_bids. + """ + import macros as mkdocs_macros # type: ignore[import-not-found] + + target_extensions = ["wav", "mp4", "jpg"] + table = mkdocs_macros.make_extension_table( + target_extensions, + src_path="appendices/media-files.md", + ) + + rendered_lines = table.split("\n") + + # Header and separator + assert rendered_lines[0].startswith("| **Format**") + assert rendered_lines[1].startswith("| ---") + + # One data row per extension + assert len(rendered_lines) == len(target_extensions) + 2 + + # Check each extension is rendered with correct display name and value + expected = { + "wav": (".wav", "Waveform Audio"), + "mp4": (".mp4", "MPEG-4 Part 14"), + "jpg": (".jpg", "Joint Photographic Experts Group"), + } + for ext_key, render_row in zip(target_extensions, rendered_lines[2:]): + value, display_name = expected[ext_key] + assert display_name in render_row + assert value in render_row + # Glossary link + assert f"glossary.md#objects.extensions.{ext_key}" in render_row