Skip to content

Commit 5deac52

Browse files
committed
chore: add doi and publisher name to search
1 parent 5fc3132 commit 5deac52

File tree

7 files changed

+119
-1
lines changed

7 files changed

+119
-1
lines changed

components/renku_data_services/search/db.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def _dataconnector_to_entity_doc(dc: DataConnector | GlobalDataConnector) -> Dat
8080
description=dc.description,
8181
keywords=dc.keywords if dc.keywords is not None else [],
8282
version=DocVersions.off(),
83+
doi=dc.doi if hasattr(dc, "doi") else None,
84+
publisherName=dc.publisher_name if hasattr(dc, "publisher_name") else None,
8385
)
8486

8587

components/renku_data_services/search/solr_user_query.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
Created,
1919
CreatedByIs,
2020
DirectMemberIs,
21+
DoiIs,
2122
IdIs,
2223
InheritedMemberIs,
2324
KeywordIs,
2425
NameIs,
2526
NamespaceIs,
2627
Order,
2728
OrderBy,
29+
PublisherNameIs,
2830
RoleIs,
2931
SlugIs,
3032
SortableField,
@@ -344,6 +346,14 @@ async def visit_slug_is(self, ft: SlugIs) -> None:
344346
"""Process slug-is segment."""
345347
self.__append(st.field_is_any(Fields.slug, ft.values.map(st.from_str)))
346348

349+
async def visit_doi_is(self, ft: DoiIs) -> None:
350+
"""Process doi-is segment."""
351+
self.__append(st.field_is_any(Fields.doi, ft.values.map(st.from_str)))
352+
353+
async def visit_publisher_name_is(self, ft: PublisherNameIs) -> None:
354+
"""Process publisher_name-is segment."""
355+
self.__append(st.field_is_any(Fields.publisher_name, ft.values.map(st.from_str)))
356+
347357
async def visit_visibility_is(self, ft: VisibilityIs) -> None:
348358
"""Process visibility-is segment."""
349359
self.__append(st.field_is_any(Fields.visibility, ft.values.map(st.from_visibility)))

components/renku_data_services/search/user_query.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class Field(StrEnum):
5151
namespace = "namespace"
5252
direct_member = "direct_member"
5353
inherited_member = "inherited_member"
54+
doi = "doi"
55+
publisher_name = "publisher_name"
5456

5557

5658
class Comparison(StrEnum):
@@ -631,6 +633,54 @@ async def accept(self, visitor: SegmentVisitior) -> None:
631633
return await visitor.visit_role_is(self)
632634

633635

636+
@dataclass
637+
class DoiIs(FieldComparison):
638+
"""Compare the doi against a list of values."""
639+
640+
values: Nel[str]
641+
642+
@property
643+
def field(self) -> Field:
644+
"""The field name."""
645+
return Field.doi
646+
647+
@property
648+
def cmp(self) -> Comparison:
649+
"""The comparison operation."""
650+
return Comparison.is_equal
651+
652+
def _render_value(self) -> str:
653+
return self.values.mk_string(",", Helper.quote)
654+
655+
async def accept(self, visitor: SegmentVisitior) -> None:
656+
"""Apply this to the visitor."""
657+
return await visitor.visit_doi_is(self)
658+
659+
660+
@dataclass
661+
class PublisherNameIs(FieldComparison):
662+
"""Compare the publisher name against a list of values."""
663+
664+
values: Nel[str]
665+
666+
@property
667+
def field(self) -> Field:
668+
"""The field name."""
669+
return Field.publisher_name
670+
671+
@property
672+
def cmp(self) -> Comparison:
673+
"""The comparison operation."""
674+
return Comparison.is_equal
675+
676+
def _render_value(self) -> str:
677+
return self.values.mk_string(",", Helper.quote)
678+
679+
async def accept(self, visitor: SegmentVisitior) -> None:
680+
"""Apply this to the visitor."""
681+
return await visitor.visit_publisher_name_is(self)
682+
683+
634684
@dataclass
635685
class Text(SegmentBase):
636686
"""A query part that is not corresponding to a specific field."""
@@ -712,6 +762,8 @@ async def accept(self, visitor: SegmentVisitior) -> None:
712762
| RoleIs
713763
| InheritedMemberIs
714764
| DirectMemberIs
765+
| DoiIs
766+
| PublisherNameIs
715767
)
716768

717769

@@ -812,6 +864,16 @@ def role_is(cls, role: Role, *args: Role) -> Segment:
812864
"""Return role-is query segment."""
813865
return RoleIs(Nel(role, list(args)))
814866

867+
@classmethod
868+
def doi_is(cls, doi: str, *args: str) -> Segment:
869+
"""Return slug-is query segment."""
870+
return DoiIs(Nel(doi, list(args)))
871+
872+
@classmethod
873+
def publisher_name_is(cls, publisher_name: str, *args: str) -> Segment:
874+
"""Return slug-is query segment."""
875+
return PublisherNameIs(Nel(publisher_name, list(args)))
876+
815877

816878
@dataclass
817879
class UserQuery:
@@ -920,6 +982,16 @@ async def visit_inherited_member_is(self, ft: InheritedMemberIs) -> None:
920982
"""Visit inherited-member-is."""
921983
...
922984

985+
@abstractmethod
986+
async def visit_doi_is(self, ft: DoiIs) -> None:
987+
"""Visit doi-is."""
988+
...
989+
990+
@abstractmethod
991+
async def visit_publisher_name_is(self, ft: PublisherNameIs) -> None:
992+
"""Visit doi-is."""
993+
...
994+
923995

924996
class UserQueryVisitor[T](SegmentVisitior):
925997
"""A visitor to transform user queries."""
@@ -1001,6 +1073,16 @@ async def visit_visibility_is(self, ft: VisibilityIs) -> None:
10011073
"""Forwards to `visit_field_term`."""
10021074
return await self.visit_field_term(ft)
10031075

1076+
@override
1077+
async def visit_doi_is(self, ft: DoiIs) -> None:
1078+
"""Forwards to `visit_field_term`."""
1079+
return await self.visit_field_term(ft)
1080+
1081+
@override
1082+
async def visit_publisher_name_is(self, ft: PublisherNameIs) -> None:
1083+
"""Forwards to `visit_field_term`."""
1084+
return await self.visit_field_term(ft)
1085+
10041086

10051087
class EmptyUserQueryVisitor[T](UserQueryFieldTermVisitor[T]):
10061088
"""A visitor with every method doing nothing.

components/renku_data_services/search/user_query_parser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
CreatedByIs,
2929
DateTimeCalc,
3030
DirectMemberIs,
31+
DoiIs,
3132
Field,
3233
Helper,
3334
IdIs,
@@ -40,6 +41,7 @@
4041
PartialDate,
4142
PartialDateTime,
4243
PartialTime,
44+
PublisherNameIs,
4345
RelativeDate,
4446
RoleIs,
4547
SlugIs,
@@ -115,6 +117,10 @@ def _make_field_term(args: tuple[str, Nel[str]]) -> Parser:
115117
return success(NamespaceIs(values))
116118
case Field.created_by:
117119
return success(CreatedByIs(values))
120+
case Field.doi:
121+
return success(DoiIs(values))
122+
case Field.publisher_name:
123+
return success(PublisherNameIs(values))
118124
case _:
119125
return fail(f"Invalid field name: {field}")
120126

components/renku_data_services/solr/entity_documents.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ class DataConnector(EntityDoc, frozen=True):
237237
isNamespace: Annotated[Literal[False], BeforeValidator(lambda e: False)] = False
238238
namespaceDetails: ResponseBody | None = None
239239
creatorDetails: ResponseBody | None = None
240+
publisherName: str | None = None
241+
doi: str | None = None
240242

241243
@property
242244
def entity_type(self) -> EntityType:
@@ -287,7 +289,7 @@ def _add_tzinfo(cls, v: datetime) -> datetime:
287289

288290
@classmethod
289291
def from_dict(cls, d: dict[str, Any]) -> DataConnector:
290-
"""Create a Project from a dictionary."""
292+
"""Create a data connector from a dictionary."""
291293
return DataConnector.model_validate(d)
292294

293295

components/renku_data_services/solr/entity_schema.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ class Fields:
5050
creator_details: Final[FieldName] = FieldName("creatorDetails")
5151
namespace_details: Final[FieldName] = FieldName("namespaceDetails")
5252

53+
# data connector fields
54+
doi: Final[FieldName] = FieldName("doi")
55+
publisher_name: Final[FieldName] = FieldName("publisherName")
56+
5357

5458
class Analyzers:
5559
"""A collection of analyzers."""
@@ -155,4 +159,14 @@ class FieldTypes:
155159
],
156160
requires_reindex=True,
157161
),
162+
SchemaMigration(
163+
version=13,
164+
commands=[
165+
AddCommand(Field.of(Fields.doi, FieldTypes.string)),
166+
AddCommand(CopyFieldRule(source=Fields.doi, dest=Fields.content_all)),
167+
AddCommand(Field.of(Fields.publisher_name, FieldTypes.string)),
168+
AddCommand(CopyFieldRule(source=Fields.publisher_name, dest=Fields.content_all)),
169+
],
170+
requires_reindex=False,
171+
),
158172
]

components/renku_data_services/solr/solr_schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ class FieldTypeClasses:
110110
type_float = FieldTypeClass("FloatPointField")
111111
type_double = FieldTypeClass("DoublePointField")
112112
type_text = FieldTypeClass("TextField")
113+
"""TextField gets tokenized in Solr by default in our deployment."""
113114
type_str = FieldTypeClass("StrField")
115+
"""StrField does not get tokenized in Solr by default in our deployment."""
114116
type_uuid = FieldTypeClass("UUIDField")
115117
type_rank = FieldTypeClass("RankField")
116118
type_date_point = FieldTypeClass("DatePointField")

0 commit comments

Comments
 (0)