Skip to content

Commit 805d391

Browse files
Jean-François Schaffsafwanrahman
andauthored
Configure Elasticsearch _id dynamically from document (#272)
* added Document method 'document_id' to customize documents' ids if needed * renamed new method to 'generate_id' and added integration tests * fixed compat with python 2 * Update tests/test_integration.py Co-authored-by: Safwan Rahman <safwan.rahman15@gmail.com> * Update tests/test_integration.py Co-authored-by: Safwan Rahman <safwan.rahman15@gmail.com> * generate_id is now a class method * improved integration tests for generate_id method of Document class * added commented broken unit tests for review / fix * Fixing pull requests tests and documentations Co-authored-by: Safwan Rahman <safwan.rahman15@gmail.com>
1 parent 4460bca commit 805d391

File tree

6 files changed

+207
-4
lines changed

6 files changed

+207
-4
lines changed

django_elasticsearch_dsl/documents.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,20 @@ def parallel_bulk(self, actions, **kwargs):
156156
# the result is currently not used upstream anyway.
157157
return (1, [])
158158

159+
@classmethod
160+
def generate_id(cls, object_instance):
161+
"""
162+
The default behavior is to use the Django object's pk (id) as the
163+
elasticseach index id (_id). If needed, this method can be overloaded
164+
to change this default behavior.
165+
"""
166+
return object_instance.pk
167+
159168
def _prepare_action(self, object_instance, action):
160169
return {
161170
'_op_type': action,
162171
'_index': self._index._name,
163-
'_id': object_instance.pk,
172+
'_id': self.generate_id(object_instance),
164173
'_source': (
165174
self.prepare(object_instance) if action != 'delete' else None
166175
),

docs/source/fields.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,46 @@ Available Fields
233233
``properties`` is a dict where the key is a field name, and the value is a field
234234
instance.
235235

236+
237+
Document id
238+
===========
239+
240+
The elasticsearch document id (``_id``) is not strictly speaking a field, as it is not
241+
part of the document itself. The default behavior of ``django_elasticsearch_dsl``
242+
is to use the primary key of the model as the document's id (``pk`` or ``id``).
243+
Nevertheless, it can sometimes be useful to change this default behavior. For this, one
244+
can redefine the ``generate_id(cls, instance)`` class method of the ``Document`` class.
245+
246+
For example, to use an article's slug as the elasticsearch ``_id`` instead of the
247+
article's integer id, one could use:
248+
249+
.. code-block:: python
250+
251+
# models.py
252+
253+
from django.db import models
254+
255+
class Article(models.Model):
256+
# ... #
257+
258+
slug = models.SlugField(
259+
max_length=255,
260+
unique=True,
261+
)
262+
263+
# ... #
264+
265+
266+
# documents.py
267+
268+
from .models import Article
269+
270+
class ArticleDocument(Document):
271+
class Django:
272+
model = Article
273+
274+
# ... #
275+
276+
@classmethod
277+
def generate_id(cls, article):
278+
return article.slug

tests/documents.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from django_elasticsearch_dsl import Document, Index, fields
33
from django_elasticsearch_dsl.registries import registry
44

5-
from .models import Ad, Category, Car, Manufacturer
5+
from .models import Ad, Category, Car, Manufacturer, Article
66

77
index_settings = {
88
'number_of_shards': 1,
@@ -148,5 +148,34 @@ class Index:
148148
settings = index_settings
149149

150150

151+
@registry.register_document
152+
class ArticleDocument(Document):
153+
class Django:
154+
model = Article
155+
fields = [
156+
'slug',
157+
]
158+
159+
class Index:
160+
name = 'test_articles'
161+
settings = index_settings
162+
163+
@registry.register_document
164+
class ArticleWithSlugAsIdDocument(Document):
165+
class Django:
166+
model = Article
167+
fields = [
168+
'slug',
169+
]
170+
171+
class Index:
172+
name = 'test_articles_with_slugs_as_doc_ids'
173+
settings = index_settings
174+
175+
@classmethod
176+
def generate_id(cls, article):
177+
return article.slug
178+
179+
151180
ad_index = AdDocument._index
152181
car_index = CarDocument._index

tests/models.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,17 @@ class Meta:
8787

8888
def __str__(self):
8989
return self.title
90+
91+
92+
@python_2_unicode_compatible
93+
class Article(models.Model):
94+
slug = models.CharField(
95+
max_length=255,
96+
unique=True,
97+
)
98+
99+
class Meta:
100+
app_label = 'tests'
101+
102+
def __str__(self):
103+
return self.slug

tests/test_documents.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from unittest import TestCase
23

34
from django.db import models
@@ -6,12 +7,15 @@
67
from mock import patch, Mock, PropertyMock
78

89
from django_elasticsearch_dsl import fields
9-
from django_elasticsearch_dsl.documents import DocType
10+
from django_elasticsearch_dsl.documents import DocType, Document
1011
from django_elasticsearch_dsl.exceptions import (ModelFieldNotMappedError,
1112
RedeclaredFieldError)
1213
from django_elasticsearch_dsl.registries import registry
1314
from tests import ES_MAJOR_VERSION
1415

16+
from .models import Article
17+
from .documents import ArticleDocument, ArticleWithSlugAsIdDocument
18+
1519

1620
class Car(models.Model):
1721
name = models.CharField(max_length=255)
@@ -346,3 +350,64 @@ def test_init_prepare_results(self):
346350
self.assertEqual(sorted([tuple(x) for x in m.method_calls], key=lambda _: _[0]),
347351
[('name', (), {}), ('price', (), {}), ('type', (), {})]
348352
)
353+
354+
# Mock the elasticsearch connection because we need to execute the bulk so that the generator
355+
# got iterated and generate_id called.
356+
# If we mock the bulk in django_elasticsearch_dsl.document
357+
# the actual bulk will be never called and the test will fail
358+
@patch('elasticsearch_dsl.connections.Elasticsearch.bulk')
359+
def test_default_generate_id_is_called(self, _):
360+
article = Article(
361+
id=124594,
362+
slug='some-article',
363+
)
364+
@registry.register_document
365+
class ArticleDocument(DocType):
366+
class Django:
367+
model = Article
368+
fields = [
369+
'slug',
370+
]
371+
372+
class Index:
373+
name = 'test_articles'
374+
settings = {
375+
'number_of_shards': 1,
376+
'number_of_replicas': 0,
377+
}
378+
379+
with patch.object(ArticleDocument, 'generate_id',
380+
return_value=article.id) as patched_method:
381+
d = ArticleDocument()
382+
d.update(article)
383+
patched_method.assert_called()
384+
385+
@patch('elasticsearch_dsl.connections.Elasticsearch.bulk')
386+
def test_custom_generate_id_is_called(self, mock_bulk):
387+
article = Article(
388+
id=54218,
389+
slug='some-article-2',
390+
)
391+
392+
@registry.register_document
393+
class ArticleDocument(DocType):
394+
class Django:
395+
model = Article
396+
fields = [
397+
'slug',
398+
]
399+
400+
class Index:
401+
name = 'test_articles'
402+
403+
@classmethod
404+
def generate_id(cls, article):
405+
return article.slug
406+
407+
d = ArticleDocument()
408+
d.update(article)
409+
410+
# Get the data from the elasticsearch low level API because
411+
# The generator get executed there.
412+
data = json.loads(mock_bulk.call_args[0][0].split("\n")[0])
413+
assert data["index"]["_id"] == article.slug

tests/test_integration.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.utils.translation import ugettext_lazy as _
88
from six import StringIO
99

10+
from elasticsearch.exceptions import NotFoundError
1011
from elasticsearch_dsl import Index as DSLIndex
1112
from django_elasticsearch_dsl.test import ESTestCase
1213
from tests import ES_MAJOR_VERSION
@@ -18,9 +19,11 @@
1819
CarDocument,
1920
CarWithPrepareDocument,
2021
ManufacturerDocument,
22+
ArticleDocument,
23+
ArticleWithSlugAsIdDocument,
2124
index_settings
2225
)
23-
from .models import Car, Manufacturer, Ad, Category, COUNTRIES
26+
from .models import Car, Manufacturer, Ad, Category, Article, COUNTRIES
2427

2528

2629
@unittest.skipUnless(
@@ -352,3 +355,43 @@ def test_queryset_iterator_queries(self):
352355
set([ad3.pk, self.ad1.pk, self.ad2.pk])
353356
)
354357

358+
def test_default_document_id(self):
359+
obj_id = 12458
360+
article_slug = "some-article"
361+
article = Article(
362+
id=obj_id,
363+
slug=article_slug,
364+
)
365+
366+
# saving should create two documents (in the two indices): one with the
367+
# Django object's id as the ES doc _id, and the other with the slug
368+
# as the ES _id
369+
article.save()
370+
371+
# assert that the document's id is the id of the Django object
372+
try:
373+
es_obj = ArticleDocument.get(id=obj_id)
374+
except NotFoundError:
375+
self.fail("document with _id {} not found").format(obj_id)
376+
self.assertEqual(es_obj.slug, article.slug)
377+
378+
def test_custom_document_id(self):
379+
article_slug = "my-very-first-article"
380+
article = Article(
381+
slug=article_slug,
382+
)
383+
384+
# saving should create two documents (in the two indices): one with the
385+
# Django object's id as the ES doc _id, and the other with the slug
386+
# as the ES _id
387+
article.save()
388+
389+
# assert that the document's id is its the slug
390+
try:
391+
es_obj = ArticleWithSlugAsIdDocument.get(id=article_slug)
392+
except NotFoundError:
393+
self.fail(
394+
"document with _id '{}' not found: "
395+
"using a custom id is broken".format(article_slug)
396+
)
397+
self.assertEqual(es_obj.slug, article.slug)

0 commit comments

Comments
 (0)