From dd1a506fefccbb54601ba8019a024759e2373839 Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Wed, 3 Jun 2026 21:10:51 -0700 Subject: [PATCH] refactor(Syntax/Binding): derive binding class from UD morphology --- Linglib.lean | 1 - Linglib/Core/Word.lean | 6 +++ Linglib/Features/CoreferenceStatus.lean | 11 ++-- .../English/NominalClassification.lean | 52 ------------------- Linglib/Fragments/English/Pronouns.lean | 10 ++-- Linglib/Phenomena/Anaphora/Coreference.lean | 6 +-- Linglib/Studies/Chomsky1981.lean | 3 +- Linglib/Studies/Hudson1990.lean | 2 +- Linglib/Studies/Nordlinger2023.lean | 6 +-- Linglib/Studies/SagWasowBender2003.lean | 3 +- Linglib/Syntax/Binding/Basic.lean | 15 ++++++ Linglib/Syntax/DependencyGrammar/Nominal.lean | 24 +++------ Linglib/Syntax/Pronoun/Basic.lean | 6 ++- 13 files changed, 55 insertions(+), 90 deletions(-) delete mode 100644 Linglib/Fragments/English/NominalClassification.lean diff --git a/Linglib.lean b/Linglib.lean index bdf24e0df..4b5f8f877 100644 --- a/Linglib.lean +++ b/Linglib.lean @@ -650,7 +650,6 @@ import Linglib.Fragments.English.Numerals import Linglib.Fragments.English.Plurals import Linglib.Fragments.English.Reference import Linglib.Fragments.English.Pronouns -import Linglib.Fragments.English.NominalClassification import Linglib.Fragments.English.PropositionalLexemes import Linglib.Fragments.English.QuestionParticles import Linglib.Fragments.English.Scales diff --git a/Linglib/Core/Word.lean b/Linglib/Core/Word.lean index cc068cd7b..365a4fb9b 100644 --- a/Linglib/Core/Word.lean +++ b/Linglib/Core/Word.lean @@ -135,6 +135,12 @@ structure Features where vform : Option VForm := none tense : Option Tense := none countable : Option MassCount := none -- for count vs mass nouns + /-- Pronoun type (UD `PronType`: personal, reciprocal, interrogative, …). Carried + so a pro-form's binding class is read off its own morphology, not its surface form. -/ + pronType : Option UD.PronType := none + /-- Reflexive morphology (UD `Reflex=Yes`). The one binding-relevant feature `PronType` + does not encode; distinguishes a reflexive anaphor from a plain personal pronoun. -/ + reflex : Bool := false deriving Repr, DecidableEq /-- A word: form + category + features. -/ diff --git a/Linglib/Features/CoreferenceStatus.lean b/Linglib/Features/CoreferenceStatus.lean index 4c8d2196c..53c070393 100644 --- a/Linglib/Features/CoreferenceStatus.lean +++ b/Linglib/Features/CoreferenceStatus.lean @@ -24,9 +24,8 @@ inductive CoreferenceStatus where /-- The binding-theoretic class of a nominal expression, shared across the syntactic frameworks (HPSG, Dependency Grammar, Minimalism) so their - `classifyNominal` functions return the same type. The per-language - realization (which forms are which) lives in a Fragment helper (e.g. - `English.NominalClassification`). + binding-class sources return the same type. The canonical source reads the + class off a word's own UD morphology (`Binding.bindingClassOf`). The *binding-distribution* axis (anaphor/pronominal/r-expression, with the anaphor class split into reflexive/reciprocal) — orthogonal to a nominal's @@ -44,9 +43,9 @@ inductive BindingClass where deriving Repr, BEq, DecidableEq /-- A **binding-class source**: *where* an expression's `BindingClass` comes from. Theories -source it differently — a lexical declaration (`Pronoun.bindingClass`), a form-string -classifier (`English.NominalClassification.classifyNominal`), a structural sort (HPSG), or the -syntactic context (minimal-pronoun / DM). The framework-neutral binding engine is polymorphic +source it differently — a word's own UD morphology (`Binding.bindingClassOf`, the canonical +language-neutral source), a lexical declaration (`Pronoun.bindingClass`), a structural sort +(HPSG), or the syntactic context (minimal-pronoun / DM). The framework-neutral binding engine is polymorphic over the source: it takes a `BindingSource` and stays agnostic to where the class came from. -/ abbrev BindingSource (α : Type _) := α → Option BindingClass diff --git a/Linglib/Fragments/English/NominalClassification.lean b/Linglib/Fragments/English/NominalClassification.lean deleted file mode 100644 index 6c721ac2b..000000000 --- a/Linglib/Fragments/English/NominalClassification.lean +++ /dev/null @@ -1,52 +0,0 @@ -import Linglib.Core.Word -import Linglib.Features.CoreferenceStatus -import Linglib.Syntax.Pronoun.Capabilities -import Linglib.Fragments.English.Pronouns - -/-! -# English nominal classification for binding - -Lexicon-driven classification of English nominals into `Features.BindingClass` — the -binding-class **source** the coreference/binding analyses in every syntactic framework -(`Syntax.Minimalist.Coreference`, `Syntax.HPSG.Coreference`, `Syntax.DependencyGrammar`) -consume. The class is read from each lexicon entry's declared `Pronoun.bindingClass` (set at -the entry; see `English.Pronouns`), looked up by surface form — one `Features.BindingSource -Word`. Other theories may source the class differently (HPSG sorts, minimal-pronoun context); -the engine is polymorphic over the source. - -φ-feature agreement is *not* here: it is `Word.Agree` (`Core.Word`), a feature-based relation -over `UD.MorphFeatures.compatible`. - -## Main definitions - -* `classifyNominal` — the English `Features.BindingSource Word`: the binding class each - matching lexicon entry *declares*, with a UPOS fallback for R-expressions. --/ - -namespace English.NominalClassification - -open Features (BindingClass BindingSource) - -/-- Is this a nominal category (proper noun, common noun, or pronoun)? -/ -def isNominalCat (c : UD.UPOS) : Bool := - c == .PROPN || c == .NOUN || c == .PRON - -/-- The full English pro-form lexicon; each entry carries its declared `bindingClass` -(set at the entry defs in `English.Pronouns`). -/ -def lexicon : List Pronoun := - English.Pronouns.reflexives ++ English.Pronouns.reciprocals ++ English.Pronouns.whWords - ++ English.pronouns.map (·.toPronoun) - -/-- Classify an English nominal: the binding class the matching lexicon entry *declares* -(`Pronoun.bindingClass`), looked up by surface form, with a UPOS R-expression fallback. The -binding class is sourced from the entry's own declaration, not from list membership. -/ -def classifyNominal : BindingSource Word := fun w => - match lexicon.find? (·.form == w.form) with - | some e => e.bindingClass - | none => if isNominalCat w.cat then some .rExpression else none - -/-- Every reflexive entry is a Principle-A anaphor by its declaration — `Bound.IsAnaphor` -(over `bindingClass`) holds across the `reflexives` lexicon. -/ -theorem reflexives_are_anaphors : ∀ p ∈ English.Pronouns.reflexives, Bound.IsAnaphor p := by decide - -end English.NominalClassification diff --git a/Linglib/Fragments/English/Pronouns.lean b/Linglib/Fragments/English/Pronouns.lean index 47a1b7b0f..a06036fda 100644 --- a/Linglib/Fragments/English/Pronouns.lean +++ b/Linglib/Fragments/English/Pronouns.lean @@ -2,6 +2,7 @@ import Linglib.Core.Word import Linglib.Features.Case import Linglib.Features.Gender import Linglib.Syntax.Pronoun.Basic +import Linglib.Syntax.Pronoun.Capabilities import Linglib.Syntax.Pronoun.Demonstrative /-! @@ -14,9 +15,9 @@ wh-pronouns are bare `Pronoun` shells (φ-features + surface form, no referentia denotation of their own). Each entry declares its `Pronoun.bindingClass`, so a form's binding-theoretic kind -is the entry's own declaration; the lexicon lists below group them by class. -`English.NominalClassification.classifyNominal` (a `Features.BindingSource Word`) -reads that declaration. +is the entry's own declaration; the lexicon lists below group them by class. `Pronoun.toWord` +threads this onto the surface word's UD morphology (`Reflex`/`PronType`), where the +framework-neutral binding engine reads it back via `Binding.bindingClassOf`. ## Gender ([konnelly-cowper-2020]) @@ -121,6 +122,9 @@ def reciprocals : List Pronoun := [eachOther, oneAnother] /-- Wh-pronouns and wh-adverbs (Principle B pronominals); each declares `bindingClass := .pronoun`. -/ def whWords : List Pronoun := [who, whom, what, which, where_, when_, why, how] +/-- Every reflexive entry is a Principle-A anaphor by its declaration. -/ +theorem reflexives_are_anaphors : ∀ p ∈ reflexives, Bound.IsAnaphor p := by decide + end English.Pronouns namespace English diff --git a/Linglib/Phenomena/Anaphora/Coreference.lean b/Linglib/Phenomena/Anaphora/Coreference.lean index bbc633f48..fa4ebc3ca 100644 --- a/Linglib/Phenomena/Anaphora/Coreference.lean +++ b/Linglib/Phenomena/Anaphora/Coreference.lean @@ -21,12 +21,12 @@ namespace Phenomena.Anaphora.Coreference private def john : Word := ⟨"John", .PROPN, { number := some .sg, person := some .third, gender := some .Masc }⟩ private def sees : Word := ⟨"sees", .VERB, { valence := some .transitive, number := some .sg, person := some .third }⟩ -private def himself : Word := ⟨"himself", .PRON, { person := some .third, number := some .sg, gender := some .Masc }⟩ +private def himself : Word := ⟨"himself", .PRON, { person := some .third, number := some .sg, gender := some .Masc, reflex := true }⟩ private def mary : Word := ⟨"Mary", .PROPN, { number := some .sg, person := some .third, gender := some .Fem }⟩ -private def herself : Word := ⟨"herself", .PRON, { person := some .third, number := some .sg, gender := some .Fem }⟩ +private def herself : Word := ⟨"herself", .PRON, { person := some .third, number := some .sg, gender := some .Fem, reflex := true }⟩ private def they : Word := ⟨"they", .PRON, { person := some .third, number := some .pl, case_ := some .nom }⟩ private def see : Word := ⟨"see", .VERB, { valence := some .transitive, number := some .pl }⟩ -private def themselves : Word := ⟨"themselves", .PRON, { person := some .third, number := some .pl }⟩ +private def themselves : Word := ⟨"themselves", .PRON, { person := some .third, number := some .pl, reflex := true }⟩ private def him : Word := ⟨"him", .PRON, { person := some .third, number := some .sg, case_ := some .acc, gender := some .Masc }⟩ private def her : Word := ⟨"her", .PRON, { person := some .third, number := some .sg, case_ := some .acc, gender := some .Fem }⟩ private def he : Word := ⟨"he", .PRON, { person := some .third, number := some .sg, case_ := some .nom, gender := some .Masc }⟩ diff --git a/Linglib/Studies/Chomsky1981.lean b/Linglib/Studies/Chomsky1981.lean index 76fc61ed4..29bff104c 100644 --- a/Linglib/Studies/Chomsky1981.lean +++ b/Linglib/Studies/Chomsky1981.lean @@ -2,7 +2,6 @@ import Linglib.Syntax.Minimalist.Coreference import Linglib.Phenomena.Anaphora.Coreference import Linglib.Fragments.English.Nouns import Linglib.Fragments.English.Pronouns -import Linglib.Fragments.English.NominalClassification import Linglib.Fragments.English.Predicates.Verbal import Linglib.Paradigms.AcceptabilityJudgment @@ -41,7 +40,7 @@ open Phenomena.Anaphora.Coreference `CommandRelation` instance (in scope via `open Minimalist.Coreference`) and English's binding-class classifier. -/ private abbrev grammaticalForCoreference (ws : List Word) : Prop := - Binding.grammaticalForCoreference English.NominalClassification.classifyNominal ws + Binding.grammaticalForCoreference Binding.bindingClassOf ws /-- Coverage of a `PhenomenonData` set under Minimalist binding theory. diff --git a/Linglib/Studies/Hudson1990.lean b/Linglib/Studies/Hudson1990.lean index f9dafd02a..1b43b4e7a 100644 --- a/Linglib/Studies/Hudson1990.lean +++ b/Linglib/Studies/Hudson1990.lean @@ -27,7 +27,7 @@ open Phenomena.Anaphora.Coreference `CommandRelation` instance (in scope via `open DepGrammar.Coreference`) and English's binding-class classifier. `Bool`-valued for `capturesPhenomenonData`. -/ private def grammaticalForCoreference (ws : List Word) : Bool := - decide (Binding.grammaticalForCoreference classifyNominal ws) + decide (Binding.grammaticalForCoreference Binding.bindingClassOf ws) -- ============================================================================ -- Capturing the Phenomena Data diff --git a/Linglib/Studies/Nordlinger2023.lean b/Linglib/Studies/Nordlinger2023.lean index c8b44db0e..ad53c7cb0 100644 --- a/Linglib/Studies/Nordlinger2023.lean +++ b/Linglib/Studies/Nordlinger2023.lean @@ -2,7 +2,7 @@ import Linglib.Typology.ArgumentStructure import Linglib.Studies.Siloni2012 import Linglib.Data.WALS.Features.F106A import Linglib.Fragments.English.Pronouns -import Linglib.Fragments.English.NominalClassification +import Linglib.Syntax.Binding.Basic import Linglib.Fragments.Swahili.Reciprocals /-! @@ -465,8 +465,8 @@ open English.Pronouns in theorem english_profile_grounded : rp_english.primaryStrategy = .bipartiteNP ∧ rp_english.valency = .bivalent ∧ - English.NominalClassification.classifyNominal eachOther.toWord = some .reciprocal ∧ - English.NominalClassification.classifyNominal eachOther.toWord ≠ some .reflexive := by + Binding.bindingClassOf eachOther.toWord = some .reciprocal ∧ + Binding.bindingClassOf eachOther.toWord ≠ some .reflexive := by refine ⟨rfl, rfl, ?_, ?_⟩ <;> decide -- ============================================================================ diff --git a/Linglib/Studies/SagWasowBender2003.lean b/Linglib/Studies/SagWasowBender2003.lean index 58491b773..aaf604e1d 100644 --- a/Linglib/Studies/SagWasowBender2003.lean +++ b/Linglib/Studies/SagWasowBender2003.lean @@ -2,7 +2,6 @@ import Linglib.Syntax.HPSG.Coreference import Linglib.Phenomena.Anaphora.Coreference import Linglib.Fragments.English.Nouns import Linglib.Fragments.English.Pronouns -import Linglib.Fragments.English.NominalClassification import Linglib.Fragments.English.Predicates.Verbal import Linglib.Paradigms.AcceptabilityJudgment @@ -36,7 +35,7 @@ open Phenomena.Anaphora.Coreference instance (in scope via `open HPSG.Coreference`) and English's binding-class classifier. `Bool`-valued for `capturesPhenomenonData`. -/ private def grammaticalForCoreference (ws : List Word) : Bool := - decide (Binding.grammaticalForCoreference English.NominalClassification.classifyNominal ws) + decide (Binding.grammaticalForCoreference Binding.bindingClassOf ws) /-- Coverage of a `PhenomenonData` set under HPSG binding. -/ def capturesCoreferenceData (phenom : PhenomenonData) : Bool := diff --git a/Linglib/Syntax/Binding/Basic.lean b/Linglib/Syntax/Binding/Basic.lean index 4dc5dda3a..3afbddd43 100644 --- a/Linglib/Syntax/Binding/Basic.lean +++ b/Linglib/Syntax/Binding/Basic.lean @@ -74,6 +74,21 @@ def SimpleClause.at? (c : SimpleClause) : Pos → Option Word def isNominalCat (cat : UD.UPOS) : Bool := cat == .PROPN || cat == .NOUN || cat == .PRON +/-- The canonical, language-neutral binding-class source: a word's Principle A/B/C class read off + its own UD morphology (`Reflex`, `PronType`) and category — *no* lexicon and *no* surface-form + lookup. Reflexive morphology → anaphor; reciprocal `PronType` → reciprocal anaphor; any other + pronoun → pronominal; a proper/common-noun category → R-expression; a non-nominal → `none`. + This is the framework- *and* language-neutral default `Features.BindingSource Word`, replacing + per-language form-string classifiers ([chomsky-1981]'s A/B/C classes as morphology). -/ +def bindingClassOf : Features.BindingSource Word := fun w => + if w.features.reflex then some .reflexive + else match w.features.pronType with + | some .Rcp => some .reciprocal + | _ => + if w.cat == .PRON then some .pronoun + else if isNominalCat w.cat then some .rExpression + else none + /-- Parse a surface word list into a simple clause: `[subj, verb, obj]` or `[subj, verb]`, requiring nominal subject/object and a verb. -/ def parseSimpleClause (ws : List Word) : Option SimpleClause := diff --git a/Linglib/Syntax/DependencyGrammar/Nominal.lean b/Linglib/Syntax/DependencyGrammar/Nominal.lean index 7ff3c9815..a3fb7ba02 100644 --- a/Linglib/Syntax/DependencyGrammar/Nominal.lean +++ b/Linglib/Syntax/DependencyGrammar/Nominal.lean @@ -1,37 +1,29 @@ import Linglib.Core.Word import Linglib.Features.CoreferenceStatus -import Linglib.Fragments.English.NominalClassification import Linglib.Fragments.English.Nouns import Linglib.Fragments.English.Pronouns import Linglib.Fragments.English.Predicates.Verbal /-! -# Shared nominal classification for DG coreference theories +# Shared English test words for DG coreference theories -Re-exports lexicon-driven nominal classification and φ-feature agreement -from `Fragments.English.NominalClassification` under the -`DepGrammar.Nominal` namespace, plus a small bundle of English test words -consumed by paper-anchored Studies (`Studies/Hudson1990.lean`). - -The Fragments → Theory edge here is a known layer-discipline shortcut: -the binding-substrate API is lexicon-dependent and only English has a -lexicon implementation so far. A future refactor should hoist the -classification API to `Features/Binding` (theory-neutral) with per-language -Fragments providing instances; the test-word lexemes should move to -`Studies/Hudson1990.lean`. +A small bundle of English test words consumed by paper-anchored Studies +(`Studies/Hudson1990.lean`), plus a `BindingClass` re-export. Binding-class +classification is no longer re-exported here: a word's Principle A/B/C class is read +off its own UD morphology by the framework- and language-neutral `Binding.bindingClassOf`, +so no per-language lexicon classifier is needed. ## Main declarations -* `BindingClass`, `isNominalCat`, `classifyNominal` — re-exported. φ-agreement - is `Word.Agree` (Core), not re-exported here. +* `BindingClass` — re-exported. φ-agreement is `Word.Agree` (Core). * `john`, `mary`, `they`, `sees`, `see`, `himself`, `herself`, `themselves`, `him`, `her`, `them`, `eachOther` — English test words. + Slated for relocation to `Studies/Hudson1990.lean` (auditor finding 2026-06-01). -/ namespace DepGrammar.Nominal export Features (BindingClass) -export English.NominalClassification (isNominalCat classifyNominal) /-! ### Shared English test words diff --git a/Linglib/Syntax/Pronoun/Basic.lean b/Linglib/Syntax/Pronoun/Basic.lean index 36f7a02bd..0a9726d39 100644 --- a/Linglib/Syntax/Pronoun/Basic.lean +++ b/Linglib/Syntax/Pronoun/Basic.lean @@ -113,7 +113,11 @@ open Features (SurfaceGender) def toWord (p : Pronoun) : Word := { form := p.form, cat := .PRON, features := { person := p.person, number := p.number, case_ := p.case_, - gender := p.gender.bind (·.toUDGender) } } + gender := p.gender.bind (·.toUDGender), + -- carry the binding-relevant morphology so a projected pro-form's class is + -- read off its own features, not recovered by surface-form lookup + reflex := p.bindingClass == some .reflexive, + pronType := if p.bindingClass == some .reciprocal then some .Rcp else none } } /-! ### Derived person category and well-formedness ([cysouw-2009]) -/