diff --git a/classification/SingleAlphaClassification.js b/classification/SingleAlphaClassification.js new file mode 100644 index 00000000..8f807b48 --- /dev/null +++ b/classification/SingleAlphaClassification.js @@ -0,0 +1,10 @@ +const Classification = require('./Classification') + +class SingleAlphaClassification extends Classification { + constructor (confidence, meta) { + super(confidence, meta) + this.label = 'single_alpha' + } +} + +module.exports = SingleAlphaClassification diff --git a/classification/SingleAlphaClassification.test.js b/classification/SingleAlphaClassification.test.js new file mode 100644 index 00000000..82358f10 --- /dev/null +++ b/classification/SingleAlphaClassification.test.js @@ -0,0 +1,24 @@ +const Classification = require('./SingleAlphaClassification') + +module.exports.tests = {} + +module.exports.tests.constructor = (test) => { + test('constructor', (t) => { + let c = new Classification() + t.false(c.public) + t.equals(c.label, 'single_alpha') + t.equals(c.confidence, 1.0) + t.deepEqual(c.meta, {}) + t.end() + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`SingleAlphaClassification: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/classification/SubdivisionClassification.js b/classification/SubdivisionClassification.js new file mode 100644 index 00000000..413fe373 --- /dev/null +++ b/classification/SubdivisionClassification.js @@ -0,0 +1,10 @@ +const Classification = require('../classification/Classification') + +class SubdivisionClassification extends Classification { + constructor (confidence, meta) { + super(confidence, meta) + this.label = 'subdivision' + } +} + +module.exports = SubdivisionClassification diff --git a/classification/SubdivisionClassification.test.js b/classification/SubdivisionClassification.test.js new file mode 100644 index 00000000..29824fdd --- /dev/null +++ b/classification/SubdivisionClassification.test.js @@ -0,0 +1,24 @@ +const Classification = require('./SubdivisionClassification') + +module.exports.tests = {} + +module.exports.tests.constructor = (test) => { + test('constructor', (t) => { + let c = new Classification() + t.false(c.public) + t.equals(c.label, 'subdivision') + t.equals(c.confidence, 1.0) + t.deepEqual(c.meta, {}) + t.end() + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`SubdivisionClassification: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/classifier/AlphaNumericClassifier.js b/classifier/AlphaNumericClassifier.js index 195c9d2d..03ce3736 100644 --- a/classifier/AlphaNumericClassifier.js +++ b/classifier/AlphaNumericClassifier.js @@ -3,6 +3,7 @@ const AlphaClassification = require('../classification/AlphaClassification') const NumericClassification = require('../classification/NumericClassification') const AlphaNumericClassification = require('../classification/AlphaNumericClassification') const PunctuationClassification = require('../classification/PunctuationClassification') +const SingleAlphaClassification = require('../classification/SingleAlphaClassification') class AlphaNumericClassifier extends WordClassifier { each (span) { @@ -14,6 +15,9 @@ class AlphaNumericClassifier extends WordClassifier { span.classify(new PunctuationClassification(1)) } else { span.classify(new AlphaClassification(1)) + if (span.norm.length === 1) { + span.classify(new SingleAlphaClassification(1)) + } } } } diff --git a/classifier/AlphaNumericClassifier.test.js b/classifier/AlphaNumericClassifier.test.js index c208b22b..a4513d42 100644 --- a/classifier/AlphaNumericClassifier.test.js +++ b/classifier/AlphaNumericClassifier.test.js @@ -3,6 +3,7 @@ const AlphaClassification = require('../classification/AlphaClassification') const NumericClassification = require('../classification/NumericClassification') const AlphaNumericClassification = require('../classification/AlphaNumericClassification') const PunctuationClassification = require('../classification/PunctuationClassification') +const SingleAlphaClassification = require('../classification/SingleAlphaClassification') const Span = require('../tokenization/Span') const classifier = new AlphaNumericClassifier() @@ -17,7 +18,10 @@ function classify (body) { module.exports.tests.alpha = (test) => { test('AlphaClassification: English letter', (t) => { let s = classify('A') - t.deepEqual(s.classifications, { AlphaClassification: new AlphaClassification(1.0) }) + t.deepEqual(s.classifications, { + AlphaClassification: new AlphaClassification(1.0), + SingleAlphaClassification: new SingleAlphaClassification(1.0) + }) t.end() }) test('AlphaClassification: English mixed-case word', (t) => { diff --git a/classifier/SubdivisionClassifier.js b/classifier/SubdivisionClassifier.js new file mode 100644 index 00000000..62112ad7 --- /dev/null +++ b/classifier/SubdivisionClassifier.js @@ -0,0 +1,25 @@ +const PhraseClassifier = require('./super/PhraseClassifier') +const SubdivisionClassification = require('../classification/SubdivisionClassification') +const libpostal = require('../resources/libpostal/libpostal') + +class SubdivisionClassifier extends PhraseClassifier { + setup () { + this.index = {} + libpostal.load(this.index, ['fr'], 'subdivisions.txt', { + lowercase: true, + minlength: 3 // prevent very short names being indexed + }) + } + + each (span) { + // skip spans which contain numbers + if (span.contains.numerals) { return } + + // use an inverted index for full token matching as it's O(1) + if (this.index.hasOwnProperty(span.norm)) { + span.classify(new SubdivisionClassification(1)) + } + } +} + +module.exports = SubdivisionClassifier diff --git a/classifier/SubdivisionClassifier.test.js b/classifier/SubdivisionClassifier.test.js new file mode 100644 index 00000000..2fa0ae28 --- /dev/null +++ b/classifier/SubdivisionClassifier.test.js @@ -0,0 +1,49 @@ +const SubdivisionClassifier = require('./SubdivisionClassifier') +const SubdivisionClassification = require('../classification/SubdivisionClassification') +const Span = require('../tokenization/Span') +const classifier = new SubdivisionClassifier() + +module.exports.tests = {} + +function classify (body) { + let s = new Span(body) + classifier.each(s, null, 1) + return s +} + +module.exports.tests.contains_numerals = (test) => { + test('contains numerals: honours contains.numerals boolean', (t) => { + let s = new Span('example') + s.contains.numerals = true + classifier.each(s, null, 1) + t.deepEqual(s.classifications, {}) + t.end() + }) +} + +module.exports.tests.classify = (test) => { + let valid = [ + 'Bis', + 'Ter' + ] + + valid.forEach(token => { + test(`classify: ${token}`, (t) => { + let s = classify(token) + t.deepEqual(s.classifications, { + SubdivisionClassification: new SubdivisionClassification(1.0) + }) + t.end() + }) + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`SubdivisionClassifier: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/classifier/scheme/street_name.js b/classifier/scheme/street_name.js index 9b59f771..62e13900 100644 --- a/classifier/scheme/street_name.js +++ b/classifier/scheme/street_name.js @@ -2,12 +2,12 @@ const StreetNameClassification = require('../../classification/StreetNameClassif module.exports = [ { - // dos Fiéis + // dos Fiéis, a Santa confidence: 0.5, Class: StreetNameClassification, scheme: [ { - is: ['StopWordClassification'], + is: ['StopWordClassification', 'SingleAlphaClassification'], not: ['DirectionalClassification', 'IntersectionClassification'] }, { @@ -17,16 +17,16 @@ module.exports = [ ] }, { - // Academia das Ciências + // Academia das Ciências, Sol a Santa confidence: 0.5, Class: StreetNameClassification, scheme: [ { is: ['AlphaClassification'], - not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'StreetPrefixClassification'] + not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'SingleAlphaClassification', 'StreetPrefixClassification'] }, { - is: ['StopWordClassification'], + is: ['StopWordClassification', 'SingleAlphaClassification'], not: ['DirectionalClassification'] }, { diff --git a/classifier/scheme/subdivision.js b/classifier/scheme/subdivision.js index c691a953..857dccb4 100644 --- a/classifier/scheme/subdivision.js +++ b/classifier/scheme/subdivision.js @@ -11,7 +11,7 @@ module.exports = [ not: ['IntersectionClassification'] }, { - is: ['StopWordClassification'], + is: ['SubdivisionClassification'], not: ['IntersectionClassification', 'PunctuationClassification'] } ] diff --git a/parser/AddressParser.js b/parser/AddressParser.js index f88eff48..f3eccf1e 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -26,6 +26,7 @@ const CentralEuropeanStreetNameClassifier = require('../classifier/CentralEurope const CompositeClassifier = require('../classifier/CompositeClassifier') const WhosOnFirstClassifier = require('../classifier/WhosOnFirstClassifier') // const AdjacencyClassifier = require('../classifier/AdjacencyClassifier') +const SubdivisionClassifier = require('../classifier/SubdivisionClassifier') const ExclusiveCartesianSolver = require('../solver/ExclusiveCartesianSolver') const LeadingAreaDeclassifier = require('../solver/LeadingAreaDeclassifier') const MultiStreetSolver = require('../solver/MultiStreetSolver') @@ -71,6 +72,7 @@ class AddressParser extends Parser { new ChainClassifier(), new PlaceClassifier(), new WhosOnFirstClassifier(), + new SubdivisionClassifier(), // composite classifiers new CompositeClassifier(require('../classifier/scheme/person')), diff --git a/resources/pelias/dictionaries/libpostal/en/stopwords.txt b/resources/pelias/dictionaries/libpostal/en/stopwords.txt new file mode 100644 index 00000000..567568f0 --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/en/stopwords.txt @@ -0,0 +1,2 @@ +!in +!a \ No newline at end of file diff --git a/resources/pelias/dictionaries/libpostal/fr/stopwords.txt b/resources/pelias/dictionaries/libpostal/fr/subdivisions.txt similarity index 100% rename from resources/pelias/dictionaries/libpostal/fr/stopwords.txt rename to resources/pelias/dictionaries/libpostal/fr/subdivisions.txt diff --git a/test/address.esp.test.js b/test/address.esp.test.js index 643386e0..62cd9ebd 100644 --- a/test/address.esp.test.js +++ b/test/address.esp.test.js @@ -6,6 +6,12 @@ const testcase = (test, common) => { { postcode: '08011' }, { locality: 'Barcelona' } ]) + // note: the desired behavior here is to not include the 'B' in the housenumber + // as it is more likely to be part of the administrative unit. + assert('Calle Principal 20 B', [ + { street: 'Calle Principal' }, { housenumber: '20' } + ]) + assert('Calle Principal 20 Barcelona', [ { street: 'Calle Principal' }, { housenumber: '20' }, { locality: 'Barcelona' } diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 197965dd..8feb457f 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -133,6 +133,12 @@ const testcase = (test, common) => { { locality: 'Châtillon' }, { country: 'France' } ]) + assert(`1 bis Rue Ballainvilliers 63000 Clermont-Ferrand`, [ + { housenumber: '1 bis' }, + { street: 'Rue Ballainvilliers' }, + { postcode: '63000' }, + { locality: 'Clermont-Ferrand' } + ]) } module.exports.all = (tape, common) => { diff --git a/test/address.usa.test.js b/test/address.usa.test.js index d31aa74a..6eeaf681 100644 --- a/test/address.usa.test.js +++ b/test/address.usa.test.js @@ -293,8 +293,21 @@ const testcase = (test, common) => { // https://github.com/pelias/parser/pull/179 assert('10 A Main Street', [ - { housenumber: '10 A' }, - { street: 'Main Street' } + { housenumber: '10' }, + { street: 'A Main Street' } + ]) + + // https://github.com/pelias/parser/issues/191 + assert('Indianapolis, IN', [ + { locality: 'Indianapolis' }, + { region: 'IN' } + ]) + assert('1441 Brown Ave, Whiting, IN 46394', [ + { housenumber: '1441' }, + { street: 'Brown Ave' }, + { locality: 'Whiting' }, + { region: 'IN' }, + { postcode: '46394' } ]) }