Skip to content
This repository was archived by the owner on May 5, 2021. It is now read-only.

Commit 08e6202

Browse files
2782 optimize person similarity duplication check (SORMAS-Foundation#3270)
* SORMAS-Foundation#2782 - Optimize person similarity/duplication check * SORMAS-Foundation#2782 - Optimize person similarity/duplication check * SORMAS-Foundation#2782 - Optimize person similarity/duplication check * SORMAS-Foundation#2782 - Optimize person similarity/duplication check * SORMAS-Foundation#2782 - Optimize person similarity/duplication check
1 parent 7e958af commit 08e6202

14 files changed

Lines changed: 147 additions & 55 deletions

File tree

sormas-api/src/main/java/de/symeda/sormas/api/person/PersonFacade.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ public interface PersonFacade {
5757
*/
5858
List<PersonNameDto> getMatchingNameDtos(UserReferenceDto user, PersonSimilarityCriteria criteria);
5959

60+
boolean checkMatchingNameInDatabase(UserReferenceDto userRef, PersonSimilarityCriteria criteria);
61+
6062
List<SimilarPersonDto> getSimilarPersonsByUuids(List<String> personUuids);
6163

6264
Boolean isValidPersonUuid(String personUuid);

sormas-backend/src/main/java/de/symeda/sormas/backend/ExtendedPostgreSQL94Dialect.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,21 @@
33
import java.sql.Types;
44

55
import org.hibernate.dialect.PostgreSQL94Dialect;
6+
import org.hibernate.dialect.function.SQLFunctionTemplate;
67
import org.hibernate.dialect.function.StandardSQLFunction;
8+
import org.hibernate.type.StandardBasicTypes;
79

810
import com.vladmihalcea.hibernate.type.json.JsonStringType;
911

1012
public class ExtendedPostgreSQL94Dialect extends PostgreSQL94Dialect {
1113

14+
public final static String SIMILARITY_OPERATOR = "similarity_operator";
15+
1216
public ExtendedPostgreSQL94Dialect() {
1317
super();
1418
// needed because of hibernate bug: https://hibernate.atlassian.net/browse/HHH-11938
1519
registerFunction("regexp_replace", new StandardSQLFunction("regexp_replace"));
1620
registerHibernateType(Types.OTHER, JsonStringType.class.getName());
21+
registerFunction(SIMILARITY_OPERATOR, new SQLFunctionTemplate(StandardBasicTypes.BOOLEAN, "?1 % ?2"));
1722
}
1823
}

sormas-backend/src/main/java/de/symeda/sormas/backend/caze/CaseService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,10 @@ public Predicate createActiveCasesFilter(CriteriaBuilder cb, Root<Case> root) {
727727
return cb.and(cb.isFalse(root.get(Case.ARCHIVED)), cb.isFalse(root.get(Case.DELETED)));
728728
}
729729

730+
public Predicate createActiveCasesFilter(CriteriaBuilder cb, Join<?, Case> join) {
731+
return cb.and(cb.isFalse(join.get(Case.ARCHIVED)), cb.isFalse(join.get(Case.DELETED)));
732+
}
733+
730734
/**
731735
* Creates a default filter that should be used as the basis of queries that do not use {@link CaseCriteria}.
732736
* This essentially removes {@link CoreAdo#deleted} cases from the queries.

sormas-backend/src/main/java/de/symeda/sormas/backend/caze/caseimport/CaseImportFacadeEjb.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ private boolean isPersonSimilarToExisting(PersonDto referencePerson) {
654654
.passportNumber(referencePerson.getPassportNumber())
655655
.nationalHealthId(referencePerson.getNationalHealthId());
656656

657-
return personFacade.getMatchingNameDtos(userFacade.getCurrentUser().toReference(), criteria).stream().findAny().isPresent();
657+
return personFacade.checkMatchingNameInDatabase(userFacade.getCurrentUser().toReference(), criteria);
658658
}
659659

660660
protected String buildEntityProperty(String[] entityPropertyPath) {

sormas-backend/src/main/java/de/symeda/sormas/backend/contact/ContactService.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,13 @@ public Predicate createActiveContactsFilter(CriteriaBuilder cb, Root<Contact> ro
11601160
return cb.and(cb.or(cb.isNull(root.get(Contact.CAZE)), cb.isFalse(caze.get(Case.ARCHIVED))), cb.isFalse(root.get(Contact.DELETED)));
11611161
}
11621162

1163+
public Predicate createActiveContactsFilter(CriteriaBuilder cb, Join<?, Contact> contactJoin) {
1164+
1165+
Join<Contact, Case> caze = contactJoin.join(Contact.CAZE, JoinType.LEFT);
1166+
return cb
1167+
.and(cb.or(cb.isNull(contactJoin.get(Contact.CAZE)), cb.isFalse(caze.get(Case.ARCHIVED))), cb.isFalse(contactJoin.get(Contact.DELETED)));
1168+
}
1169+
11631170
/**
11641171
* Creates a default filter that should be used as the basis of queries that do
11651172
* not use {@link ContactCriteria}. This essentially removes

sormas-backend/src/main/java/de/symeda/sormas/backend/event/EventParticipantService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ public Predicate createActiveEventParticipantsFilter(CriteriaBuilder cb, Root<Ev
181181
return cb.and(cb.isFalse(event.get(Event.ARCHIVED)), cb.isFalse(event.get(Event.DELETED)));
182182
}
183183

184+
public Predicate createActiveEventParticipantsFilter(CriteriaBuilder cb, Join<?, EventParticipant> eventParticipantJoin) {
185+
186+
Join<EventParticipant, Event> event = eventParticipantJoin.join(EventParticipant.EVENT, JoinType.LEFT);
187+
return cb.and(cb.isFalse(event.get(Event.ARCHIVED)), cb.isFalse(event.get(Event.DELETED)));
188+
}
189+
184190
/**
185191
* @see /sormas-backend/doc/UserDataAccess.md
186192
*/

sormas-backend/src/main/java/de/symeda/sormas/backend/person/Person.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.util.Date;
2323
import java.util.HashSet;
24+
import java.util.List;
2425
import java.util.Set;
2526

2627
import javax.persistence.CascadeType;
@@ -50,6 +51,7 @@
5051
import de.symeda.sormas.api.person.PresentCondition;
5152
import de.symeda.sormas.api.person.Sex;
5253
import de.symeda.sormas.api.person.SymptomJournalStatus;
54+
import de.symeda.sormas.backend.caze.Case;
5355
import de.symeda.sormas.backend.common.AbstractDomainObject;
5456
import de.symeda.sormas.backend.contact.Contact;
5557
import de.symeda.sormas.backend.event.EventParticipant;
@@ -114,6 +116,9 @@ public class Person extends AbstractDomainObject {
114116

115117
public static final String SYMPTOM_JOURNAL_STATUS = "symptomJournalStatus";
116118
public static final String EXTERNAL_ID = "externalId";
119+
public static final String PERSON_CASES = "personCases";
120+
public static final String PERSON_CONTACTS = "personContacts";
121+
public static final String PERSON_EVENT_PARTICIPANTS = "personEventParticipants";
117122

118123
private String firstName;
119124
private String lastName;
@@ -176,6 +181,10 @@ public class Person extends AbstractDomainObject {
176181
private Set<EventParticipant> eventParticipants = new HashSet<>();
177182
private Set<Contact> contacts = new HashSet<>();
178183

184+
private List<Case> personCases;
185+
private List<Contact> personContacts;
186+
private List<EventParticipant> personEventParticipants;
187+
179188
@Column(nullable = false, length = COLUMN_LENGTH_DEFAULT)
180189
public String getFirstName() {
181190
return firstName;
@@ -615,6 +624,33 @@ public Set<EventParticipant> getEventParticipants() {
615624
return eventParticipants;
616625
}
617626

627+
@OneToMany(mappedBy = Case.PERSON, fetch = FetchType.LAZY)
628+
public List<Case> getPersonCases() {
629+
return personCases;
630+
}
631+
632+
public void setPersonCases(List<Case> personCases) {
633+
this.personCases = personCases;
634+
}
635+
636+
@OneToMany(mappedBy = Contact.PERSON, fetch = FetchType.LAZY)
637+
public List<Contact> getPersonContacts() {
638+
return personContacts;
639+
}
640+
641+
public void setPersonContacts(List<Contact> personContacts) {
642+
this.personContacts = personContacts;
643+
}
644+
645+
@OneToMany(mappedBy = EventParticipant.PERSON, fetch = FetchType.LAZY)
646+
public List<EventParticipant> getPersonEventParticipants() {
647+
return personEventParticipants;
648+
}
649+
650+
public void setPersonEventParticipants(List<EventParticipant> personEventParticipants) {
651+
this.personEventParticipants = personEventParticipants;
652+
}
653+
618654
public void setContacts(Set<Contact> contacts) {
619655
this.contacts = contacts;
620656
}

sormas-backend/src/main/java/de/symeda/sormas/backend/person/PersonFacadeEjb.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,18 @@ public List<PersonNameDto> getMatchingNameDtos(UserReferenceDto userRef, PersonS
139139
return Collections.emptyList();
140140
}
141141

142-
return new ArrayList<>(personService.getMatchingNameDtos(user, criteria));
142+
return new ArrayList<>(personService.getMatchingNameDtos(criteria, null));
143+
}
144+
145+
@Override
146+
public boolean checkMatchingNameInDatabase(UserReferenceDto userRef, PersonSimilarityCriteria criteria) {
147+
148+
User user = userService.getByReferenceDto(userRef);
149+
if (user == null) {
150+
return false;
151+
}
152+
153+
return personService.getMatchingNameDtos(criteria, 1).size() > 0;
143154
}
144155

145156
@Override
@@ -396,7 +407,9 @@ public boolean setSymptomJournalStatus(String personUuid, SymptomJournalStatus s
396407
*/
397408
private void cleanUp(Person person) {
398409

399-
if (person.getPresentCondition() == null || person.getPresentCondition() == PresentCondition.ALIVE || person.getPresentCondition() == PresentCondition.UNKNOWN) {
410+
if (person.getPresentCondition() == null
411+
|| person.getPresentCondition() == PresentCondition.ALIVE
412+
|| person.getPresentCondition() == PresentCondition.UNKNOWN) {
400413
person.setDeathDate(null);
401414
person.setCauseOfDeath(null);
402415
person.setCauseOfDeathDisease(null);

sormas-backend/src/main/java/de/symeda/sormas/backend/person/PersonService.java

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,21 @@
1717
*******************************************************************************/
1818
package de.symeda.sormas.backend.person;
1919

20+
import static de.symeda.sormas.backend.ExtendedPostgreSQL94Dialect.SIMILARITY_OPERATOR;
21+
2022
import java.sql.Timestamp;
2123
import java.util.Collections;
2224
import java.util.Comparator;
2325
import java.util.Date;
24-
import java.util.HashSet;
2526
import java.util.List;
26-
import java.util.Set;
2727
import java.util.stream.Collectors;
2828
import java.util.stream.Stream;
2929

3030
import javax.ejb.EJB;
3131
import javax.ejb.LocalBean;
3232
import javax.ejb.Stateless;
33+
import javax.persistence.Query;
34+
import javax.persistence.TypedQuery;
3335
import javax.persistence.criteria.CriteriaBuilder;
3436
import javax.persistence.criteria.CriteriaQuery;
3537
import javax.persistence.criteria.Expression;
@@ -283,64 +285,54 @@ private Predicate isInJurisdiction(CriteriaBuilder cb, CriteriaQuery<Long> cq, R
283285
return cb.or(isCaseInJurisdiction, isContactInJurisdiction, isEventParticipantInJurisdiction);
284286
}
285287

286-
public Set<PersonNameDto> getMatchingNameDtos(User user, PersonSimilarityCriteria criteria) {
288+
public List<PersonNameDto> getMatchingNameDtos(PersonSimilarityCriteria criteria, Integer limit) {
289+
290+
setSimilarityThresholdQuery();
287291

288292
CriteriaBuilder cb = em.getCriteriaBuilder();
289-
Set<PersonNameDto> persons = new HashSet<>();
293+
Predicate caseContactEventParticipantLinkPredicate;
290294

291-
// Persons of active cases
292-
CriteriaQuery<PersonNameDto> casePersonsQuery = cb.createQuery(PersonNameDto.class);
293-
Root<Case> casePersonsRoot = casePersonsQuery.from(Case.class);
294-
Join<Case, Person> casePersonsJoin = casePersonsRoot.join(Case.PERSON, JoinType.LEFT);
295+
CriteriaQuery<PersonNameDto> personQuery = cb.createQuery(PersonNameDto.class);
296+
Root<Person> personRoot = personQuery.from(Person.class);
297+
Join<Person, Case> personCaseJoin = personRoot.join(Person.PERSON_CASES, JoinType.LEFT);
298+
Join<Person, Contact> personContactJoin = personRoot.join(Person.PERSON_CONTACTS, JoinType.LEFT);
299+
Join<Person, EventParticipant> personEventParticipantJoin = personRoot.join(Person.PERSON_EVENT_PARTICIPANTS, JoinType.LEFT);
295300

296-
casePersonsQuery.multiselect(casePersonsJoin.get(Person.FIRST_NAME), casePersonsJoin.get(Person.LAST_NAME), casePersonsJoin.get(Person.UUID));
301+
personQuery.multiselect(personRoot.get(Person.FIRST_NAME), personRoot.get(Person.LAST_NAME), personRoot.get(Person.UUID));
297302

298-
Predicate casePersonsFilter = buildSimilarityCriteriaFilter(criteria, cb, casePersonsJoin);
299-
Predicate activeCasesFilter = caseService.createActiveCasesFilter(cb, casePersonsRoot);
300-
Predicate caseUserFilter = caseService.createUserFilter(cb, casePersonsQuery, casePersonsRoot);
301-
casePersonsQuery.where(
302-
caseUserFilter != null ? and(cb, casePersonsFilter, activeCasesFilter, caseUserFilter) : and(cb, casePersonsFilter, activeCasesFilter));
303-
casePersonsQuery.distinct(true);
304-
persons.addAll(em.createQuery(casePersonsQuery).getResultList());
303+
// Persons of active cases
304+
Predicate personSimilarityFilter = buildSimilarityCriteriaFilter(criteria, cb, personRoot);
305+
Predicate activeCasesFilter = caseService.createActiveCasesFilter(cb, personCaseJoin);
306+
Predicate caseUserFilter = caseService.createUserFilter(cb, personQuery, personCaseJoin);
307+
Predicate personCasePredicate = and(cb, personCaseJoin.get(Case.ID).isNotNull(), activeCasesFilter, caseUserFilter);
305308

306309
// Persons of active contacts
307-
CriteriaQuery<PersonNameDto> contactPersonsQuery = cb.createQuery(PersonNameDto.class);
308-
Root<Contact> contactPersonsRoot = contactPersonsQuery.from(Contact.class);
309-
Join<Contact, Person> contactPersonsJoin = contactPersonsRoot.join(Contact.PERSON, JoinType.LEFT);
310-
311-
contactPersonsQuery
312-
.multiselect(contactPersonsJoin.get(Person.FIRST_NAME), contactPersonsJoin.get(Person.LAST_NAME), contactPersonsJoin.get(Person.UUID));
313-
314-
Predicate contactPersonsFilter = buildSimilarityCriteriaFilter(criteria, cb, contactPersonsRoot.join(Contact.PERSON, JoinType.LEFT));
315-
Predicate activeContactsFilter = contactService.createActiveContactsFilter(cb, contactPersonsRoot);
316-
Predicate contactUserFilter = contactService.createUserFilter(cb, contactPersonsQuery, contactPersonsRoot);
317-
contactPersonsQuery.where(
318-
contactPersonsFilter != null
319-
? and(cb, contactPersonsFilter, activeContactsFilter, contactUserFilter)
320-
: and(cb, contactPersonsFilter, activeContactsFilter));
321-
contactPersonsQuery.distinct(true);
322-
persons.addAll(em.createQuery(contactPersonsQuery).getResultList());
310+
Predicate activeContactsFilter = contactService.createActiveContactsFilter(cb, personContactJoin);
311+
Predicate contactUserFilter = contactService.createUserFilter(cb, personQuery, personContactJoin);
312+
Predicate personContactPredicate = and(cb, personContactJoin.get(Contact.ID).isNotNull(), contactUserFilter, activeContactsFilter);
323313

324314
// Persons of event participants in active events
325-
CriteriaQuery<PersonNameDto> eventPersonsQuery = cb.createQuery(PersonNameDto.class);
326-
Root<EventParticipant> eventPersonsRoot = eventPersonsQuery.from(EventParticipant.class);
327-
Join<EventParticipant, Person> eventPersonsJoin = eventPersonsRoot.join(EventParticipant.PERSON, JoinType.LEFT);
328-
329-
eventPersonsQuery
330-
.multiselect(eventPersonsJoin.get(Person.FIRST_NAME), eventPersonsJoin.get(Person.LAST_NAME), eventPersonsJoin.get(Person.UUID));
331-
332-
Predicate eventParticipantPersonsFilter =
333-
buildSimilarityCriteriaFilter(criteria, cb, eventPersonsRoot.join(EventParticipant.PERSON, JoinType.LEFT));
334-
Predicate activeEventParticipantsFilter = eventParticipantService.createActiveEventParticipantsFilter(cb, eventPersonsRoot);
335-
Predicate eventParticipantUserFilter = eventParticipantService.createUserFilter(cb, eventPersonsQuery, eventPersonsRoot);
336-
eventPersonsQuery.where(
337-
eventParticipantUserFilter != null
338-
? and(cb, eventParticipantPersonsFilter, activeEventParticipantsFilter, eventParticipantUserFilter)
339-
: and(cb, eventParticipantPersonsFilter, activeEventParticipantsFilter));
340-
eventPersonsQuery.distinct(true);
341-
persons.addAll(em.createQuery(eventPersonsQuery).getResultList());
315+
Predicate activeEventParticipantsFilter = eventParticipantService.createActiveEventParticipantsFilter(cb, personEventParticipantJoin);
316+
Predicate eventParticipantUserFilter = eventParticipantService.createUserFilter(cb, personQuery, personEventParticipantJoin);
317+
Predicate personEventParticipantPredicate =
318+
and(cb, personEventParticipantJoin.get(EventParticipant.ID).isNotNull(), activeEventParticipantsFilter, eventParticipantUserFilter);
319+
320+
caseContactEventParticipantLinkPredicate = or(cb, personCasePredicate, personContactPredicate, personEventParticipantPredicate);
321+
322+
personQuery.where(and(cb, personSimilarityFilter, caseContactEventParticipantLinkPredicate));
323+
personQuery.distinct(true);
324+
325+
TypedQuery<PersonNameDto> query = em.createQuery(personQuery);
326+
if (limit != null) {
327+
query.setMaxResults(limit);
328+
}
329+
return query.getResultList();
330+
}
342331

343-
return persons;
332+
public void setSimilarityThresholdQuery() {
333+
double nameSimilarityThreshold = configFacade.getNameSimilarityThreshold();
334+
Query q = em.createNativeQuery("select set_limit(" + nameSimilarityThreshold + ")");
335+
q.getSingleResult();
344336
}
345337

346338
public List<Person> getDeathsBetween(Date fromDate, Date toDate, District district, Disease disease, User user) {
@@ -411,8 +403,7 @@ public Predicate buildSimilarityCriteriaFilter(PersonSimilarityCriteria criteria
411403

412404
String name = criteria.getFirstName() + " " + criteria.getLastName();
413405

414-
double nameSimilarityThreshold = configFacade.getNameSimilarityThreshold();
415-
filter = and(cb, filter, cb.gt(cb.function("similarity", double.class, nameExpr, cb.literal(name)), nameSimilarityThreshold));
406+
filter = and(cb, filter, cb.isTrue(cb.function(SIMILARITY_OPERATOR, boolean.class, nameExpr, cb.literal(name))));
416407
}
417408

418409
if (criteria.getSex() != null) {

sormas-backend/src/main/resources/sql/sormas_schema.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5494,4 +5494,8 @@ ALTER TABLE users ADD COLUMN hasConsentedToGdpr boolean default false;
54945494
ALTER TABLE users_history ADD COLUMN hasConsentedToGdpr boolean default false;
54955495
INSERT INTO schema_version (version_number, comment) VALUES (267, 'Add gdpr popup to user');
54965496

5497+
--2020-10-22 Optimize person similarity/duplication check
5498+
CREATE INDEX similarity_index
5499+
ON person using gist ((firstName || ' ' || lastName) gist_trgm_ops);
5500+
INSERT INTO schema_version (version_number, comment) VALUES (268, 'Optimize person similarity/duplication check');
54975501
-- *** Insert new sql commands BEFORE this line ***

0 commit comments

Comments
 (0)