001/*-
002 * #%L
003 * HAPI FHIR JPA Model
004 * %%
005 * Copyright (C) 2014 - 2024 Smile CDR, Inc.
006 * %%
007 * Licensed under the Apache License, Version 2.0 (the "License");
008 * you may not use this file except in compliance with the License.
009 * You may obtain a copy of the License at
010 *
011 *      http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 * #L%
019 */
020package ca.uhn.fhir.jpa.model.search;
021
022import org.hibernate.search.engine.backend.document.DocumentElement;
023import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement;
024import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField;
025import org.hibernate.search.engine.backend.types.Aggregable;
026import org.hibernate.search.engine.backend.types.ObjectStructure;
027import org.hibernate.search.engine.backend.types.Projectable;
028import org.hibernate.search.engine.backend.types.Searchable;
029import org.hibernate.search.engine.backend.types.Sortable;
030import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory;
031import org.hibernate.search.engine.backend.types.dsl.StandardIndexFieldTypeOptionsStep;
032import org.hibernate.search.engine.backend.types.dsl.StringIndexFieldTypeOptionsStep;
033import org.hibernate.search.mapper.pojo.bridge.PropertyBridge;
034import org.hibernate.search.mapper.pojo.bridge.binding.PropertyBindingContext;
035import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.PropertyBinder;
036import org.hibernate.search.mapper.pojo.bridge.runtime.PropertyBridgeWriteContext;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import java.time.Instant;
041
042import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_EXACT;
043import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_LOWER;
044import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_NORMALIZED;
045import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.IDX_STRING_TEXT;
046import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.NUMBER_VALUE;
047import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE;
048import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_CODE_NORM;
049import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_SYSTEM;
050import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE;
051import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.QTY_VALUE_NORM;
052import static ca.uhn.fhir.jpa.model.search.HSearchIndexWriter.URI_VALUE;
053
054/**
055 * Allows hibernate search to index
056 * <p>
057 * CodeableConcept.text
058 * Coding.display
059 * Identifier.type.text
060 */
061public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedHSearchIndexData> {
062        private static final Logger ourLog = LoggerFactory.getLogger(SearchParamTextPropertyBinder.class);
063
064        public static final String SEARCH_PARAM_TEXT_PREFIX = "text-";
065        public static final String LOWERCASE_ASCIIFOLDING_NORMALIZER = "lowercaseAsciifoldingNormalizer";
066
067        @Override
068        public void bind(PropertyBindingContext thePropertyBindingContext) {
069                // TODO Is it safe to use object identity of the Map to track dirty?
070                // N.B. GGG I would hazard that it is not, we could potentially use Version of the resource.
071                thePropertyBindingContext.dependencies().use("mySearchParamStrings").use("mySearchParamQuantities");
072
073                defineIndexingTemplate(thePropertyBindingContext);
074
075                thePropertyBindingContext.bridge(ExtendedHSearchIndexData.class, this);
076        }
077
078        private void defineIndexingTemplate(PropertyBindingContext thePropertyBindingContext) {
079                IndexSchemaElement indexSchemaElement = thePropertyBindingContext.indexSchemaElement();
080
081                // In order to support dynamic fields, we have to use field templates. We _must_ define the template at
082                // bootstrap time and cannot
083                // create them adhoc.
084                // https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic
085                // I _think_ im doing the right thing here by indicating that everything matching this template uses this
086                // analyzer.
087                IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory();
088                // TODO mb Once Ken finishes extracting a common base, we can share these constants with
089                // HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer
090                StringIndexFieldTypeOptionsStep<?> standardAnalyzer =
091                                indexFieldTypeFactory.asString().analyzer("standardAnalyzer").projectable(Projectable.NO);
092
093                StringIndexFieldTypeOptionsStep<?> lowerCaseNormalizer = indexFieldTypeFactory
094                                .asString()
095                                .normalizer(LOWERCASE_ASCIIFOLDING_NORMALIZER)
096                                .sortable(Sortable.YES)
097                                .projectable(Projectable.YES);
098
099                StringIndexFieldTypeOptionsStep<?> exactAnalyzer = indexFieldTypeFactory
100                                .asString()
101                                .analyzer("exactAnalyzer") // default max-length is 256.  Is that enough for code system uris?
102                                .projectable(Projectable.NO);
103
104                StringIndexFieldTypeOptionsStep<?> normStringAnalyzer =
105                                indexFieldTypeFactory.asString().analyzer("normStringAnalyzer").projectable(Projectable.NO);
106
107                StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory
108                                .asString()
109                                // TODO JB: may have to add normalizer to support case insensitive searches depending on token flags
110                                .projectable(Projectable.NO)
111                                .sortable(Sortable.YES)
112                                .aggregable(Aggregable.YES);
113
114                StandardIndexFieldTypeOptionsStep<?, Instant> dateTimeFieldType =
115                                indexFieldTypeFactory.asInstant().projectable(Projectable.NO).sortable(Sortable.YES);
116
117                StandardIndexFieldTypeOptionsStep<?, Integer> dateTimeOrdinalFieldType =
118                                indexFieldTypeFactory.asInteger().projectable(Projectable.NO).sortable(Sortable.YES);
119
120                StandardIndexFieldTypeOptionsStep<?, Double> bigDecimalFieldType =
121                                indexFieldTypeFactory.asDouble().projectable(Projectable.NO).sortable(Sortable.YES);
122
123                StringIndexFieldTypeOptionsStep<?> forcedIdType =
124                                indexFieldTypeFactory.asString().projectable(Projectable.YES).aggregable(Aggregable.NO);
125
126                // type to store payload fields that do not participate in search, only results
127                StringIndexFieldTypeOptionsStep<?> stringStorageType = indexFieldTypeFactory
128                                .asString()
129                                .searchable(Searchable.NO)
130                                .projectable(Projectable.YES)
131                                .aggregable(Aggregable.NO);
132
133                // the old style for _text and _contains
134                indexSchemaElement
135                                .fieldTemplate("SearchParamText", standardAnalyzer)
136                                .matchingPathGlob(SEARCH_PARAM_TEXT_PREFIX + "*");
137
138                indexSchemaElement.field("myForcedId", forcedIdType).toReference();
139
140                indexSchemaElement.field("myRawResource", stringStorageType).toReference();
141
142                // The following section is a bit ugly.  We need to enforce order and dependency or the object matches will be
143                // too big.
144                {
145                        IndexSchemaObjectField spfield =
146                                        indexSchemaElement.objectField(HSearchIndexWriter.SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
147                        spfield.toReference();
148                        IndexSchemaObjectField nestedSpField = indexSchemaElement.objectField(
149                                        HSearchIndexWriter.NESTED_SEARCH_PARAM_ROOT, ObjectStructure.FLATTENED);
150                        nestedSpField.toReference();
151
152                        // Note: the lucene/elastic independent api is hurting a bit here.
153                        // For lucene, we need a separate field for each analyzer.  So we'll add string (for :exact), and text (for
154                        // :text).
155                        // They aren't marked stored, so there's no space cost beyond the index for each.
156                        // But for elastic, we'd rather have a single field defined, with multi-field sub-fields.  The index cost is
157                        // the same,
158                        // but elastic will actually store all fields in the source document and consume disk.
159
160                        // So triplicate the storage for now. :-(
161                        String stringPathGlob = "*.string";
162                        spfield.objectFieldTemplate("stringIndex", ObjectStructure.FLATTENED)
163                                        .matchingPathGlob(stringPathGlob);
164                        spfield.fieldTemplate("string-norm", normStringAnalyzer)
165                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED)
166                                        .multiValued();
167                        spfield.fieldTemplate("string-exact", exactAnalyzer)
168                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_EXACT)
169                                        .multiValued();
170                        spfield.fieldTemplate("string-text", standardAnalyzer)
171                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT)
172                                        .multiValued();
173                        spfield.fieldTemplate("string-lower", lowerCaseNormalizer)
174                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_LOWER)
175                                        .multiValued();
176
177                        nestedSpField
178                                        .objectFieldTemplate("nestedStringIndex", ObjectStructure.FLATTENED)
179                                        .matchingPathGlob(stringPathGlob);
180                        nestedSpField
181                                        .fieldTemplate("string-norm", normStringAnalyzer)
182                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_NORMALIZED)
183                                        .multiValued();
184                        nestedSpField
185                                        .fieldTemplate("string-text", standardAnalyzer)
186                                        .matchingPathGlob(stringPathGlob + "." + IDX_STRING_TEXT)
187                                        .multiValued();
188
189                        // token
190                        // Ideally, we'd store a single code-system string and use a custom tokenizer to
191                        // generate "system|" "|code" and "system|code" tokens to support all three.
192                        // But the standard tokenizers aren't that flexible.  As second best, it would be nice to use elastic
193                        // multi-fields
194                        // to apply three different tokenizers to a single value.
195                        // Instead, just be simple and expand into three full fields for now
196                        String tokenPathGlob = "*.token";
197                        spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
198                        spfield.fieldTemplate("token-code", keywordFieldType)
199                                        .matchingPathGlob(tokenPathGlob + ".code")
200                                        .multiValued();
201                        spfield.fieldTemplate("token-code-system", keywordFieldType)
202                                        .matchingPathGlob(tokenPathGlob + ".code-system")
203                                        .multiValued();
204                        spfield.fieldTemplate("token-system", keywordFieldType)
205                                        .matchingPathGlob(tokenPathGlob + ".system")
206                                        .multiValued();
207
208                        nestedSpField
209                                        .objectFieldTemplate("nestedTokenIndex", ObjectStructure.FLATTENED)
210                                        .matchingPathGlob(tokenPathGlob);
211                        nestedSpField
212                                        .fieldTemplate("token-code", keywordFieldType)
213                                        .matchingPathGlob(tokenPathGlob + ".code")
214                                        .multiValued();
215                        nestedSpField
216                                        .fieldTemplate("token-code-system", keywordFieldType)
217                                        .matchingPathGlob(tokenPathGlob + ".code-system")
218                                        .multiValued();
219                        nestedSpField
220                                        .fieldTemplate("token-system", keywordFieldType)
221                                        .matchingPathGlob(tokenPathGlob + ".system")
222                                        .multiValued();
223
224                        // reference
225                        spfield.fieldTemplate("reference-value", keywordFieldType)
226                                        .matchingPathGlob("*.reference.value")
227                                        .multiValued();
228
229                        // uri
230                        spfield.fieldTemplate("uriValueTemplate", keywordFieldType)
231                                        .matchingPathGlob("*." + URI_VALUE)
232                                        .multiValued();
233                        nestedSpField
234                                        .fieldTemplate("uriValueTemplate", keywordFieldType)
235                                        .matchingPathGlob("*." + URI_VALUE)
236                                        .multiValued();
237
238                        // number
239                        spfield.fieldTemplate("numberValueTemplate", bigDecimalFieldType).matchingPathGlob("*." + NUMBER_VALUE);
240                        nestedSpField
241                                        .fieldTemplate("numberValueTemplate", bigDecimalFieldType)
242                                        .matchingPathGlob("*." + NUMBER_VALUE);
243
244                        // quantity
245                        String quantityPathGlob = "*.quantity";
246                        nestedSpField
247                                        .objectFieldTemplate("quantityTemplate", ObjectStructure.FLATTENED)
248                                        .matchingPathGlob(quantityPathGlob);
249                        nestedSpField
250                                        .fieldTemplate(QTY_SYSTEM, keywordFieldType)
251                                        .matchingPathGlob(quantityPathGlob + "." + QTY_SYSTEM);
252                        nestedSpField.fieldTemplate(QTY_CODE, keywordFieldType).matchingPathGlob(quantityPathGlob + "." + QTY_CODE);
253                        nestedSpField
254                                        .fieldTemplate(QTY_VALUE, bigDecimalFieldType)
255                                        .matchingPathGlob(quantityPathGlob + "." + QTY_VALUE);
256                        nestedSpField
257                                        .fieldTemplate(QTY_CODE_NORM, keywordFieldType)
258                                        .matchingPathGlob(quantityPathGlob + "." + QTY_CODE_NORM);
259                        nestedSpField
260                                        .fieldTemplate(QTY_VALUE_NORM, bigDecimalFieldType)
261                                        .matchingPathGlob(quantityPathGlob + "." + QTY_VALUE_NORM);
262
263                        // date
264                        String dateTimePathGlob = "*.dt";
265                        spfield.objectFieldTemplate("datetimeIndex", ObjectStructure.FLATTENED)
266                                        .matchingPathGlob(dateTimePathGlob);
267                        spfield.fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType)
268                                        .matchingPathGlob(dateTimePathGlob + ".lower-ord")
269                                        .multiValued();
270                        spfield.fieldTemplate("datetime-lower-value", dateTimeFieldType)
271                                        .matchingPathGlob(dateTimePathGlob + ".lower")
272                                        .multiValued();
273                        spfield.fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType)
274                                        .matchingPathGlob(dateTimePathGlob + ".upper-ord")
275                                        .multiValued();
276                        spfield.fieldTemplate("datetime-upper-value", dateTimeFieldType)
277                                        .matchingPathGlob(dateTimePathGlob + ".upper")
278                                        .multiValued();
279
280                        nestedSpField
281                                        .objectFieldTemplate("nestedDatetimeIndex", ObjectStructure.FLATTENED)
282                                        .matchingPathGlob(dateTimePathGlob);
283                        nestedSpField
284                                        .fieldTemplate("datetime-lower-ordinal", dateTimeOrdinalFieldType)
285                                        .matchingPathGlob(dateTimePathGlob + ".lower-ord")
286                                        .multiValued();
287                        nestedSpField
288                                        .fieldTemplate("datetime-lower-value", dateTimeFieldType)
289                                        .matchingPathGlob(dateTimePathGlob + ".lower")
290                                        .multiValued();
291                        nestedSpField
292                                        .fieldTemplate("datetime-upper-ordinal", dateTimeOrdinalFieldType)
293                                        .matchingPathGlob(dateTimePathGlob + ".upper-ord")
294                                        .multiValued();
295                        nestedSpField
296                                        .fieldTemplate("datetime-upper-value", dateTimeFieldType)
297                                        .matchingPathGlob(dateTimePathGlob + ".upper")
298                                        .multiValued();
299
300                        // last, since the globs are matched in declaration order, and * matches even nested nodes.
301                        spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*");
302
303                        // we use nested search params for the autocomplete search.
304                        nestedSpField
305                                        .objectFieldTemplate("nestedSpSubObject", ObjectStructure.FLATTENED)
306                                        .matchingPathGlob("*.*")
307                                        .multiValued();
308                        nestedSpField
309                                        .objectFieldTemplate("nestedSpObject", ObjectStructure.NESTED)
310                                        .matchingPathGlob("*")
311                                        .multiValued();
312                }
313        }
314
315        @Override
316        public void write(
317                        DocumentElement theDocument,
318                        ExtendedHSearchIndexData theIndexData,
319                        PropertyBridgeWriteContext thePropertyBridgeWriteContext) {
320                if (theIndexData != null) {
321                        ourLog.trace("Writing index data for {}", theIndexData);
322                        theIndexData.writeIndexElements(theDocument);
323                }
324        }
325}