Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Run search_api2 tests

on:
Expand All @@ -21,6 +18,9 @@ on:
jobs:
build:
runs-on: ubuntu-latest
env:
ELASTICSEARCH_AUTH_USERNAME: elastic
ELASTICSEARCH_AUTH_PASSWORD: changeme

steps:
- name: Check out GitHub repo
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Upgraded Python to version 3.9.19 in test workflows and Dockerfile
- Updated integration tests README file
- Required tests to now use authentication against elasticsearch

### Fixed
- Container/service shutdown issues; all unit and integration tests now pass locally

### Security
- Vendored `kbase-jsonrpcbase` 0.3.0a6 and `jsonrpc11base` to resolve dependency conflicts
- Now requires credentials for connection to elasticsearch


## [1.0.0] - 2021-04-20
### Fixed
Expand Down
5 changes: 4 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ services:
- DEVELOPMENT=1
- PYTHONUNBUFFERED=true
- ELASTICSEARCH_URL=http://elasticsearch:9200
- ELASTICSEARCH_AUTH_USERNAME=elastic
- ELASTICSEARCH_AUTH_PASSWORD=changeme
- WORKERS=2

elasticsearch:
Expand All @@ -25,7 +27,8 @@ services:
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- bootstrap.memory_lock=true
- discovery.type=single-node
- xpack.security.enabled=false
- xpack.security.enabled=true
- ELASTIC_PASSWORD=changeme
ports:
- "127.0.0.1:9200:9200"
- "127.0.0.1:9300:9300"
6 changes: 3 additions & 3 deletions src/es_client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@
if params.get('track_total_hits'):
options['track_total_hits'] = params.get('track_total_hits')

headers = {'Content-Type': 'application/json'}

# Allows index exclusion; otherwise there is an error
params = {'allow_no_indices': 'true'}

resp = requests.post(url, data=json.dumps(options), params=params, headers=headers)
resp = requests.post(
url, data=json.dumps(options), params=params, headers=config['elasticsearch_headers']
) # nosec B113

if not resp.ok:
_handle_es_err(resp)
Expand All @@ -100,7 +100,7 @@

def _handle_es_err(resp):
"""Handle a non-2xx response from Elasticsearch."""
logger.error(f"Elasticsearch response error:\n{resp.text}")

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
try:
resp_json = resp.json()
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion src/search2_rpc/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def show_indexes(params, meta):
prefix = config['index_prefix']
resp = requests.get(
config['elasticsearch_url'] + '/_cat/indices/' + prefix + '*?format=json',
headers={'Content-Type': 'application/json'},
headers=config['elasticsearch_headers'],
)
if not resp.ok:
raise ElasticsearchError(resp.text)
Expand Down
2 changes: 1 addition & 1 deletion src/server/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def _get_status_code(result: dict) -> int:

# Wait for dependencies to start
logger.info('Checking connection to elasticsearch')
wait_for_service(config['elasticsearch_url'], 'Elasticsearch')
wait_for_service(config['elasticsearch_url'], 'Elasticsearch', config['elasticsearch_headers'])
# Start the server
app.run(
host='0.0.0.0', # nosec
Expand Down
34 changes: 29 additions & 5 deletions src/utils/config.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
import yaml
import urllib.request
import os
import base64


def auth_header_encoder(username, password):
"""
Encodes username and password for a Basic Authentication header.
Raises RuntimeError if either username or password is not provided.
"""
if not (username and password):
raise RuntimeError(
"Elasticsearch authentication credentials are required. "
"Set ELASTICSEARCH_AUTH_USERNAME and ELASTICSEARCH_AUTH_PASSWORD environment variables."
)
credentials = f"{username}:{password}"
credentials_bytes = credentials.encode('utf-8')
base64_credentials = base64.b64encode(credentials_bytes).decode('utf-8')
return f"Basic {base64_credentials}"


def init_config():
"""
Initialize configuration data for the whole app
Initialize configuration data for the whole app.
"""
# TODO: it might be better to NOT default to testing configuration,
# but rather explicitly set the test environment.
# Reason? A failure to configure one of these in prod could lead to
# confusing failure conditions.
ws_url = os.environ.get('WORKSPACE_URL', 'https://ci.kbase.us/services/ws').strip('/')
es_url = os.environ.get('ELASTICSEARCH_URL', 'http://localhost:9200').strip('/')
es_auth_username = os.environ.get('ELASTICSEARCH_AUTH_USERNAME')
es_auth_password = os.environ.get('ELASTICSEARCH_AUTH_PASSWORD')
index_prefix = os.environ.get('INDEX_PREFIX', 'test')
prefix_delimiter = os.environ.get('INDEX_PREFIX_DELIMITER', '.')
suffix_delimiter = os.environ.get('INDEX_SUFFIX_DELIMITER', '_')
Expand All @@ -24,6 +39,13 @@ def init_config():
'USER_PROFILE_URL',
'https://ci.kbase.us/services/user_profile/rpc/'
)

auth_header_value = auth_header_encoder(es_auth_username, es_auth_password)
elasticsearch_headers = {
'Content-Type': 'application/json',
'Authorization': auth_header_value
}

# Load the global configuration release (non-environment specific, public config)
allowed_protocols = ('https://', 'http://', 'file://')
matches_protocol = (config_url.startswith(prot) for prot in allowed_protocols)
Expand All @@ -33,10 +55,12 @@ def init_config():
global_config = yaml.safe_load(res)
with open('VERSION') as fd:
app_version = fd.read().replace('\n', '')

return {
'dev': bool(os.environ.get('DEVELOPMENT')),
'global': global_config,
'elasticsearch_url': es_url,
'elasticsearch_headers': elasticsearch_headers,
'index_prefix': index_prefix,
'prefix_delimiter': prefix_delimiter,
'suffix_delimiter': suffix_delimiter,
Expand Down
4 changes: 2 additions & 2 deletions src/utils/wait_for_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
WAIT_POLL_INTERVAL = 5


def wait_for_service(url, name, timeout=DEFAULT_TIMEOUT):
def wait_for_service(url, name, headers, timeout=DEFAULT_TIMEOUT):
start = time.time()
while True:
logger.info(f'Attempting to connect to {name} at {url}')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
try:
requests.get(url, timeout=timeout).raise_for_status()
requests.get(url, timeout=timeout, headers=headers).raise_for_status()
logger.info(f'{name} is online!')
break
except Exception:
logger.info(f'Waiting for {name} at {url}')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
total_elapsed = time.time() - start
if total_elapsed > timeout:
logger.error(f'Unable to connect to {name} at {url} after {total_elapsed} seconds')

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
exit(1)
time.sleep(WAIT_POLL_INTERVAL)
68 changes: 35 additions & 33 deletions tests/helpers/init_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from src.utils.config import config


# TODO use a util for creating index names
narrative_index_name = ''.join([
config['index_prefix'],
Expand Down Expand Up @@ -50,36 +51,8 @@
]


def init_elasticsearch():
"""
Initialize the indexes and documents on elasticsearch before running tests.
"""
global _COMPLETED
if _COMPLETED:
return
for index_name in index_names:
create_index(index_name)
create_index(narrative_index_name)
for index_name in index_names:
for doc in test_docs:
create_doc(index_name, doc)
for doc in narrative_docs:
create_doc(narrative_index_name, doc)
# create default_search alias for all fields.
url = f"{_ES_URL}/_aliases"
alias_name = config['index_prefix'] + config['prefix_delimiter'] + "default_search"
body = {
"actions": [
{"add": {"indices": index_names, "alias": alias_name}}
]
}
resp = requests.post(url, data=json.dumps(body), headers={'Content-Type': 'application/json'})
if not resp.ok:
raise RuntimeError("Error creating aliases on ES:", resp.text)
_COMPLETED = True


def create_index(index_name):
"""Create an Elasticsearch index if it does not already exist."""
# Check if exists
resp = requests.head(_ES_URL + '/' + index_name)
if resp.status_code == 200:
Expand All @@ -91,22 +64,51 @@ def create_index(index_name):
'index': {'number_of_shards': 2, 'number_of_replicas': 1}
}
}),
headers={'Content-Type': 'application/json'},
headers=config['elasticsearch_headers'],
)
if not resp.ok and resp.json()['error']['type'] != 'index_already_exists_exception':
raise RuntimeError('Error creating index on ES:', resp.text)


def create_doc(index_name, data):
"""Create a document in the specified index."""
# Wait for doc to sync
url = '/'.join([ # type: ignore
url = '/'.join([
_ES_URL,
index_name,
'_doc',
data['name'],
'?refresh=wait_for'
])
headers = {'Content-Type': 'application/json'}
resp = requests.put(url, data=json.dumps(data), headers=headers)
resp = requests.put(url, data=json.dumps(data), headers=config['elasticsearch_headers'])
if not resp.ok:
raise RuntimeError(f"Error creating test doc:\n{resp.text}")


def init_elasticsearch():
"""
Initialize the indexes and documents on elasticsearch before running tests.
"""
global _COMPLETED
if _COMPLETED:
return
for index_name in index_names:
create_index(index_name)
create_index(narrative_index_name)
for index_name in index_names:
for doc in test_docs:
create_doc(index_name, doc)
for doc in narrative_docs:
create_doc(narrative_index_name, doc)
# create default_search alias for all fields.
url = f"{_ES_URL}/_aliases"
alias_name = config['index_prefix'] + config['prefix_delimiter'] + "default_search"
body = {
"actions": [
{"add": {"indices": index_names, "alias": alias_name}}
]
}
resp = requests.post(url, data=json.dumps(body), headers=config['elasticsearch_headers'])
if not resp.ok:
raise RuntimeError("Error creating aliases on ES:", resp.text)
_COMPLETED = True
2 changes: 1 addition & 1 deletion tests/helpers/integration_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def start_service(app_url):
stdout=container_out,
stderr=container_err,
cwd=cwd)
wait_for_service(app_url, "search2")
wait_for_service(app_url, "search2", {})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this work without the headers?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it already didn't send any headers when querying search2. Only elastic requires auth

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I'm dumb. I was thinking this function is exclusively for ES



def stop_service():
Expand Down
3 changes: 2 additions & 1 deletion tests/helpers/unit_setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import subprocess
from src.utils.wait_for_service import wait_for_service
from src.utils.config import config
from src.utils.logger import logger
import json
import os
Expand Down Expand Up @@ -29,7 +30,7 @@ def start_service(wait_for_url, wait_for_name):
container_out = open("container.out", "w")
container_err = open("container.err", "w")
container_process = subprocess.Popen(cmd, shell=True, stdout=container_out, stderr=container_err)
wait_for_service(wait_for_url, wait_for_name)
wait_for_service(wait_for_url, wait_for_name, config['elasticsearch_headers'])


def stop_service():
Expand Down
15 changes: 14 additions & 1 deletion tests/unit/utils/test_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.utils.config import init_config
from src.utils.config import init_config, auth_header_encoder
import os
import pytest

Expand All @@ -13,3 +13,16 @@ def test_init_config_invalid_config_url():
os.environ['GLOBAL_CONFIG_URL'] = original_url
else:
os.environ.pop('GLOBAL_CONFIG_URL')


@pytest.mark.parametrize("username,password", [
(None, None),
(None, 'password'),
('username', None),
('', ''),
('', 'password'),
('username', ''),
])
def test_auth_header_encoder_missing_credentials(username, password):
with pytest.raises(RuntimeError, match="Elasticsearch authentication credentials are required"):
auth_header_encoder(username, password)
2 changes: 1 addition & 1 deletion tests/unit/utils/test_wait_for_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def bad_url_with_timeout(name, url, timeout, caplog):
with caplog.at_level(logging.INFO, logger='search2'):
start = time.time()
with pytest.raises(SystemExit) as se:
wait_for_service(url, 'foo', timeout=timeout)
wait_for_service(url, 'foo', {}, timeout=timeout)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question - not sure how this can work without headers if auth is always on

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Auth is always on for elastic, not for any other services.


# Ensure it is attempting to exit.
assert se.type == SystemExit
Expand Down
Loading