MDM Tutorial
If you'd like to quickly try out Smile CDR MDM on a fresh install of Smile CDR, the following steps show how to load a csv of patient data into Smile CDR and run MDM matching on it.
The following MDM JSON Rules and ETL Importer Mapping Script are configured to work with the test data provided by the IntraHealth Client Registry.
mdm-quickstart
, choose a password, and grant them the following permissions:{
"version": "1",
"mdmTypes": ["Patient", "Practitioner"],
"candidateSearchParams": [{
"resourceType": "Patient",
"searchParams": ["phone"]
}, {
"resourceType": "Patient",
"searchParams": ["birthdate"]
}
],
"candidateFilterSearchParams": [],
"matchFields": [
{
"name": "birthday",
"resourceType": "Patient",
"resourcePath": "birthDate",
"matcher": {
"algorithm": "STRING"
}
},
{
"name": "phone",
"resourceType": "Patient",
"resourcePath": "telecom.value",
"matcher": {
"algorithm": "STRING"
}
},
{
"name": "firstname-meta",
"resourceType": "Patient",
"resourcePath": "name.given",
"matcher": {
"algorithm": "METAPHONE"
}
},
{
"name": "lastname-meta",
"resourceType": "Patient",
"resourcePath": "name.family",
"matcher": {
"algorithm": "METAPHONE"
}
}
],
"matchResultMap": {
"firstname-meta,lastname-meta,birthday": "MATCH",
"firstname-meta,lastname-meta,phone": "MATCH"
}
}
And the following default survivorship rules
function mdmApplySurvivorshipRules(targetRec, goldenRec, transactionContext) {
// by default replace name on the golden record, if target has it
var helper = new MdmHelper(Fhir.getContext(), targetRec, goldenRec, transactionContext);
helper.replaceAll();
}
function mdmApplySurvivorshipRulesOnMergeGoldenResources(fromGoldenRec, toGoldenRec, transactionContext) {
// merge all fields by default
var helper = new MdmHelper(Fhir.getContext(), fromGoldenRec, toGoldenRec, transactionContext);
helper.mergeAll();
}
Save the MDM Module settings and start the MDM module.
Add an ETL Importer module, set the worker thread to 1, set the Storage dependency to the storage module, then create a Mapping Script to parse your CSV of patient data.
// CSV Mapping function - Expects input in the format:
// rec_id,sex,date_of_birth,given_name,surname,phone_number,uganda_nin,art_number
function handleEtlImportRow(inputMap, context) {
Log.info("Processing CSV row from file: " + context.filename);
// Create a patient
var patient = ResourceBuilder.build('Patient');
patient.id = inputMap['rec_id'];
// Identifier
patient.identifier[0].system = 'http://smilecdr.com/rec';
patient.identifier[0].value = inputMap['rec_id'].trim();
// Identifier
patient.identifier[1].system = 'http://smilecdr.com/uganda_nin';
patient.identifier[1].value = inputMap['uganda_nin'].trim();
// Identifier
patient.identifier[2].system = 'http://smilecdr.com/art_number';
patient.identifier[2].value = inputMap['art_number'].trim();
// Name
patient.name[0].family = inputMap['surname'].trim();
patient.name[0].given[0] = inputMap['given_name'].trim();
// DOB
if (inputMap['date_of_birth'].trim() && inputMap['date_of_birth'].length == 8) {
bdate = inputMap['date_of_birth'].trim();
const year = bdate.substr(0,4);
const month = bdate.substr(4,2);
const day = bdate.substr(6,2);
if (month < 13 && day < 32) {
const formattedDate = year + "-" + month + "-" + day;
patient.birthDate = formattedDate;
}
}
// Gender
if (inputMap['sex'].trim() === 'm') {
patient.gender = 'male';
} else if (inputMap['sex'].trim() === 'f') {
patient.gender = 'female'
}
if (inputMap['phone_number'].trim()) {
patient.telecom[0].system = 'phone';
patient.telecom[0].value = inputMap['phone_number'].trim();
}
// Build a transaction and process it
var transaction = TransactionBuilder.newTransactionBuilder();
transaction.update(patient);
Fhir.transaction(transaction);
}
Save the ETL Importer module settings and start the Module.
Shut down Smile CDR.
Enable the MDM Troubleshooting log by modifying the contents of classes/logback.xml
Start Smile CDR.
Load the patient data csv into Smile CDR with the following command
./smileutil upload-csv-bulk-import-file -f /path/to/patient_data.csv -b "mdm-quickstart:<PASSWORD>" -u "http://localhost:9000" -i etl_importer
Run tail -f log/mdm-troubleshooting.log
to watch the data being analyzed by MDM. Once this log has stopped adding lines, you can move on to analyzing the output.
For my run, I wrote a little program to analyze the output. My program called http://localhost:9000/mdm/mdm/query-links
and then counted links by personId to measure false positive and false negative rate. This first run produced a False Positive rate of 0% and a False Negative (missed matches) rate of 37%. Which is not very good.
By adding just a couple more matchers, I was able to get the False Negative rate down to just 3.7%.
Here is the configuration I used to achieve the improved matching rate:
{
"version": "1",
"mdmTypes": ["Patient", "Practitioner"],
"candidateSearchParams": [
{
"resourceType": "Patient",
"searchParams": ["phone"]
},
{
"resourceType": "Patient",
"searchParams": ["birthdate"]
}
],
"candidateFilterSearchParams": [],
"matchFields": [
{
"name": "birthday",
"resourceType": "Patient",
"resourcePath": "birthDate",
"matcher": {
"algorithm": "STRING"
}
},
{
"name": "phone",
"resourceType": "Patient",
"resourcePath": "telecom.value",
"matcher": {
"algorithm": "STRING"
}
},
{
"name": "firstname-meta",
"resourceType": "Patient",
"resourcePath": "name.given",
"matcher": {
"algorithm": "METAPHONE"
}
},
{
"name": "lastname-meta",
"resourceType": "Patient",
"resourcePath": "name.family",
"matcher": {
"algorithm": "METAPHONE"
}
},
{
"name": "firstname-jaro",
"resourceType": "Patient",
"resourcePath": "name.given",
"similarity": {
"algorithm": "JARO_WINKLER",
"matchThreshold": 0.80
}
},
{
"name": "lastname-jaro",
"resourceType": "Patient",
"resourcePath": "name.family",
"similarity": {
"algorithm": "JARO_WINKLER",
"matchThreshold": 0.80
}
}
],
"matchResultMap": {
"firstname-meta,lastname-meta,birthday": "MATCH",
"firstname-meta,lastname-meta,phone": "MATCH",
"firstname-jaro,lastname-jaro,birthday": "POSSIBLE_MATCH",
"firstname-jaro,lastname-jaro,phone": "POSSIBLE_MATCH",
"lastname-jaro,phone,birthday": "POSSIBLE_MATCH",
"firstname-jaro,phone,birthday": "POSSIBLE_MATCH"
}
}
Between runs, you can use the $mdm-clear
operation to expunge MDM link data and then call the $mdm-submit
operation to submit all the loaded Patient resources to MDM again. Remember that these calls need to be authenticated using the username mdm-quickstart
and the password you previously chose.
You can then call /mdm/mdm/query-links
to pull the new links and analyze the false positive and false negative rates. You should iterate through $mdm-clear
, $mdm-submit
and /mdm/mdm/query-links
until you achieve sufficiently low positive and false negative rates.
You are about to leave the Smile Digital Health documentation and navigate to the Open Source HAPI-FHIR Documentation.