Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions classification/SingleAlphaClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class SingleAlphaClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'single_alpha'
}
}

module.exports = SingleAlphaClassification
24 changes: 24 additions & 0 deletions classification/SingleAlphaClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./SingleAlphaClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'single_alpha')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`SingleAlphaClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
10 changes: 10 additions & 0 deletions classification/SubdivisionClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class SubdivisionClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'subdivision'
}
}

module.exports = SubdivisionClassification
24 changes: 24 additions & 0 deletions classification/SubdivisionClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./SubdivisionClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'subdivision')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`SubdivisionClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
4 changes: 4 additions & 0 deletions classifier/AlphaNumericClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const AlphaClassification = require('../classification/AlphaClassification')
const NumericClassification = require('../classification/NumericClassification')
const AlphaNumericClassification = require('../classification/AlphaNumericClassification')
const PunctuationClassification = require('../classification/PunctuationClassification')
const SingleAlphaClassification = require('../classification/SingleAlphaClassification')

class AlphaNumericClassifier extends WordClassifier {
each (span) {
Expand All @@ -14,6 +15,9 @@ class AlphaNumericClassifier extends WordClassifier {
span.classify(new PunctuationClassification(1))
} else {
span.classify(new AlphaClassification(1))
if (span.norm.length === 1) {
span.classify(new SingleAlphaClassification(1))
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion classifier/AlphaNumericClassifier.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const AlphaClassification = require('../classification/AlphaClassification')
const NumericClassification = require('../classification/NumericClassification')
const AlphaNumericClassification = require('../classification/AlphaNumericClassification')
const PunctuationClassification = require('../classification/PunctuationClassification')
const SingleAlphaClassification = require('../classification/SingleAlphaClassification')
const Span = require('../tokenization/Span')
const classifier = new AlphaNumericClassifier()

Expand All @@ -17,7 +18,10 @@ function classify (body) {
module.exports.tests.alpha = (test) => {
test('AlphaClassification: English letter', (t) => {
let s = classify('A')
t.deepEqual(s.classifications, { AlphaClassification: new AlphaClassification(1.0) })
t.deepEqual(s.classifications, {
AlphaClassification: new AlphaClassification(1.0),
SingleAlphaClassification: new SingleAlphaClassification(1.0)
})
t.end()
})
test('AlphaClassification: English mixed-case word', (t) => {
Expand Down
25 changes: 25 additions & 0 deletions classifier/SubdivisionClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const PhraseClassifier = require('./super/PhraseClassifier')
const SubdivisionClassification = require('../classification/SubdivisionClassification')
const libpostal = require('../resources/libpostal/libpostal')

class SubdivisionClassifier extends PhraseClassifier {
setup () {
this.index = {}
libpostal.load(this.index, ['fr'], 'subdivisions.txt', {
lowercase: true,
minlength: 3 // prevent very short names being indexed
})
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new SubdivisionClassification(1))
}
}
}

module.exports = SubdivisionClassifier
49 changes: 49 additions & 0 deletions classifier/SubdivisionClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const SubdivisionClassifier = require('./SubdivisionClassifier')
const SubdivisionClassification = require('../classification/SubdivisionClassification')
const Span = require('../tokenization/Span')
const classifier = new SubdivisionClassifier()

module.exports.tests = {}

function classify (body) {
let s = new Span(body)
classifier.each(s, null, 1)
return s
}

module.exports.tests.contains_numerals = (test) => {
test('contains numerals: honours contains.numerals boolean', (t) => {
let s = new Span('example')
s.contains.numerals = true
classifier.each(s, null, 1)
t.deepEqual(s.classifications, {})
t.end()
})
}

module.exports.tests.classify = (test) => {
let valid = [
'Bis',
'Ter'
]

valid.forEach(token => {
test(`classify: ${token}`, (t) => {
let s = classify(token)
t.deepEqual(s.classifications, {
SubdivisionClassification: new SubdivisionClassification(1.0)
})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`SubdivisionClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
10 changes: 5 additions & 5 deletions classifier/scheme/street_name.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ const StreetNameClassification = require('../../classification/StreetNameClassif

module.exports = [
{
// dos Fiéis
// dos Fiéis, a Santa
confidence: 0.5,
Class: StreetNameClassification,
scheme: [
{
is: ['StopWordClassification'],
is: ['StopWordClassification', 'SingleAlphaClassification'],
not: ['DirectionalClassification', 'IntersectionClassification']
},
{
Expand All @@ -17,16 +17,16 @@ module.exports = [
]
},
{
// Academia das Ciências
// Academia das Ciências, Sol a Santa
confidence: 0.5,
Class: StreetNameClassification,
scheme: [
{
is: ['AlphaClassification'],
not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'StreetPrefixClassification']
not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'SingleAlphaClassification', 'StreetPrefixClassification']
},
{
is: ['StopWordClassification'],
is: ['StopWordClassification', 'SingleAlphaClassification'],
not: ['DirectionalClassification']
},
{
Expand Down
2 changes: 1 addition & 1 deletion classifier/scheme/subdivision.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module.exports = [
not: ['IntersectionClassification']
},
{
is: ['StopWordClassification'],
is: ['SubdivisionClassification'],
not: ['IntersectionClassification', 'PunctuationClassification']
}
]
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const CentralEuropeanStreetNameClassifier = require('../classifier/CentralEurope
const CompositeClassifier = require('../classifier/CompositeClassifier')
const WhosOnFirstClassifier = require('../classifier/WhosOnFirstClassifier')
// const AdjacencyClassifier = require('../classifier/AdjacencyClassifier')
const SubdivisionClassifier = require('../classifier/SubdivisionClassifier')
const ExclusiveCartesianSolver = require('../solver/ExclusiveCartesianSolver')
const LeadingAreaDeclassifier = require('../solver/LeadingAreaDeclassifier')
const MultiStreetSolver = require('../solver/MultiStreetSolver')
Expand Down Expand Up @@ -71,6 +72,7 @@ class AddressParser extends Parser {
new ChainClassifier(),
new PlaceClassifier(),
new WhosOnFirstClassifier(),
new SubdivisionClassifier(),

// composite classifiers
new CompositeClassifier(require('../classifier/scheme/person')),
Expand Down
2 changes: 2 additions & 0 deletions resources/pelias/dictionaries/libpostal/en/stopwords.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
!in
Comment thread
missinglink marked this conversation as resolved.
!a
6 changes: 6 additions & 0 deletions test/address.esp.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ const testcase = (test, common) => {
{ postcode: '08011' }, { locality: 'Barcelona' }
])

// note: the desired behavior here is to not include the 'B' in the housenumber
// as it is more likely to be part of the administrative unit.
assert('Calle Principal 20 B', [
{ street: 'Calle Principal' }, { housenumber: '20' }
])

assert('Calle Principal 20 Barcelona', [
{ street: 'Calle Principal' }, { housenumber: '20' },
{ locality: 'Barcelona' }
Expand Down
6 changes: 6 additions & 0 deletions test/address.fra.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ const testcase = (test, common) => {
{ locality: 'Châtillon' },
{ country: 'France' }
])
assert(`1 bis Rue Ballainvilliers 63000 Clermont-Ferrand`, [
{ housenumber: '1 bis' },
{ street: 'Rue Ballainvilliers' },
{ postcode: '63000' },
{ locality: 'Clermont-Ferrand' }
])
}

module.exports.all = (tape, common) => {
Expand Down
17 changes: 15 additions & 2 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,21 @@ const testcase = (test, common) => {

// https://github.com/pelias/parser/pull/179
assert('10 A Main Street', [
{ housenumber: '10 A' },
{ street: 'Main Street' }
{ housenumber: '10' },
{ street: 'A Main Street' }
])

// https://github.com/pelias/parser/issues/191
assert('Indianapolis, IN', [
{ locality: 'Indianapolis' },
{ region: 'IN' }
])
assert('1441 Brown Ave, Whiting, IN 46394', [
{ housenumber: '1441' },
{ street: 'Brown Ave' },
{ locality: 'Whiting' },
{ region: 'IN' },
{ postcode: '46394' }
])
}

Expand Down