Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/settings/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# e.g. when you are no longer using the import actions
WAGTAILREDIRECTS_AUTO_CREATE = False

# Cooment out if you need to use timezone aware datetimes
# Comment out if you need to use timezone aware datetimes
USE_TZ = False

# Shell plus config
Expand Down
3 changes: 2 additions & 1 deletion wordpress.docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@ services:
image: mysql:5.7
platform: linux/x86_64
restart: always
platform: linux/x86_64
env_file:
- .env
volumes:
- db:/var/lib/mysql
healthcheck:
test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ]
interval: 5s
timeout: 5s
timeout: 10s
retries: 5

volumes:
Expand Down
44 changes: 44 additions & 0 deletions wp_connector/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,41 @@ class can be used for all models.
# processing foreign keys here as we have access to all the data now
self.process_one_to_many(self.one_to_many)
self.process_many_to_many(self.many_to_many)
self.process_clean_fields(self.clean_fields)

@staticmethod
def get_cleaned_data(process_clean_fields, item):
cleaned_data = []

def clean_content(content):
# currently just removes whitespace incl. newlines
# from the start and end of the content
# remove br tags if they are a top level tag using beautifulsoup
# remove empty paragraphs

soup = bs(content, "html.parser")
tags = []
for tag in soup.find_all("br", recursive=False):
tag.decompose()

for tag in soup.find_all("p", recursive=False):
if not tag.text.strip():
tag.decompose()

for tag in soup.find_all(recursive=True):
tags.append(str(tag))

return "".join(tags)

for field in process_clean_fields():
for key, value in field.items():
cleaned_data.append(
{
key: clean_content(jmespath.search(value, item)),
}
)

return cleaned_data

@staticmethod
def get_many_to_many_data(process_many_to_many_keys, item):
Expand Down Expand Up @@ -221,3 +256,12 @@ def process_many_to_many(self, objects):

for related_object in related_objects:
getattr(obj, field).add(related_object)

@staticmethod
def process_clean_fields(cleaned_fields):
sys.stdout.write("Processing clean fields...\n")
for obj in cleaned_fields:
for field in obj.cleaned_data:
for key, value in field.items():
setattr(obj, key, value)
obj.save()
2 changes: 1 addition & 1 deletion wp_connector/models/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def process_clean_fields():
"""Clean the content."""
return [
{
"content": "wp_cleaned_content",
"wp_cleaned_content": "content.rendered",
}
]

Expand Down
2 changes: 1 addition & 1 deletion wp_connector/models/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def process_clean_fields():
"""Clean the content."""
return [
{
"content": "wp_cleaned_content",
"wp_cleaned_content": "content.rendered",
}
]

Expand Down