1111 FieldName ,
1212 FieldType ,
1313 Filters ,
14+ ReplaceCommand ,
1415 SchemaCommand ,
1516 Tokenizers ,
1617 TypeName ,
@@ -79,6 +80,11 @@ class Analyzers:
7980 ],
8081 )
8182
83+ keyword_case_insensitive : Final [Analyzer ] = Analyzer (
84+ tokenizer = Tokenizers .keyword ,
85+ filters = [Filters .lowercase ],
86+ )
87+
8288
8389class FieldTypes :
8490 """A collection of field types."""
@@ -99,6 +105,12 @@ class FieldTypes:
99105 )
100106 date_time : Final [FieldType ] = FieldType .date_time_point (TypeName ("SearchDateTime" ))
101107
108+ keyword : Final [FieldType ] = (
109+ FieldType .text (TypeName ("Keyword" )).make_stored ().with_analyzer (Analyzers .keyword_case_insensitive )
110+ )
111+ """keyword is a field type that is not changed at all by the tokenizer, and is stored unchanged
112+ but is searched in case-insensitive manner. Note, analyzers cannot be added to StrField, so we use TextField."""
113+
102114
103115initial_entity_schema : Final [list [SchemaCommand ]] = [
104116 AddCommand (FieldTypes .id ),
@@ -162,11 +174,19 @@ class FieldTypes:
162174 SchemaMigration (
163175 version = 13 ,
164176 commands = [
165- AddCommand (Field .of (Fields .doi , FieldTypes .string )),
177+ AddCommand (FieldTypes .keyword ),
178+ AddCommand (Field .of (Fields .doi , FieldTypes .keyword )),
166179 AddCommand (CopyFieldRule (source = Fields .doi , dest = Fields .content_all )),
167- AddCommand (Field .of (Fields .publisher_name , FieldTypes .string )),
180+ AddCommand (Field .of (Fields .publisher_name , FieldTypes .keyword )),
168181 AddCommand (CopyFieldRule (source = Fields .publisher_name , dest = Fields .content_all )),
169182 ],
170183 requires_reindex = False ,
171184 ),
185+ SchemaMigration (
186+ version = 14 ,
187+ commands = [
188+ ReplaceCommand (Field .of (Fields .keywords , FieldTypes .keyword ).make_multi_valued ()),
189+ ],
190+ requires_reindex = True ,
191+ ),
172192]
0 commit comments