Skip to content

Commit

Permalink
Fix OPTIMADE rester URL contruction and improve testing (#3756)
Browse files Browse the repository at this point in the history
* Handle trailing slashes in OptimadeRester aliases

* Allow tests to run without MP API key, and re-enabled old tests

* Add MCloud and multi-alias test

* add @ml-evs as CODEOWNER for pymatgen/ext and tests/ext directories

---------

Co-authored-by: Janosh Riebesell <[email protected]>
  • Loading branch information
ml-evs and janosh authored Apr 13, 2024
1 parent 585bb67 commit 37d1066
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 48 deletions.
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
pymatgen/io/ase.py @Andrew-S-Rosen
pymatgen/io/abinit/* @gmatteo
pymatgen/io/lobster/* @JaGeo
pymatgen/ext/* @ml-evs
tests/ext/* @ml-evs
17 changes: 17 additions & 0 deletions pymatgen/ext/optimade.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ def __init__(
# and values as the corresponding URL
self.resources = {}

# preprocess aliases to ensure they have a trailing slash where appropriate
for alias, url in self.aliases.items():
if urlparse(url).path is not None and not url.endswith("/"):
self.aliases[alias] += "/"

if not aliases_or_resource_urls:
aliases_or_resource_urls = list(self.aliases)
_logger.warning(
Expand Down Expand Up @@ -442,6 +447,10 @@ def _validate_provider(self, provider_url) -> Provider | None:
TODO: careful reading of OPTIMADE specification required
TODO: add better exception handling, intentionally permissive currently
"""
# Add trailing slash to all URLs if missing; prevents urljoin from scrubbing
# sections of the path
if urlparse(provider_url).path is not None and not provider_url.endswith("/"):
provider_url += "/"

def is_url(url) -> bool:
"""Basic URL validation thanks to https://stackoverflow.com/a/52455972."""
Expand Down Expand Up @@ -492,6 +501,9 @@ def _parse_provider(self, provider: str, provider_url: str) -> dict[str, Provide
A dictionary of keys (in format of "provider.database") to
Provider objects.
"""
# Add trailing slash to all URLs if missing; prevents urljoin from scrubbing
if urlparse(provider_url).path is not None and not provider_url.endswith("/"):
provider_url += "/"
try:
url = urljoin(provider_url, "v1/links")
provider_link_json = self._get_json(url)
Expand Down Expand Up @@ -552,6 +564,11 @@ def refresh_aliases(self, providers_url="https://providers.optimade.org/provider

self.aliases = {alias: provider.base_url for alias, provider in structure_providers.items()}

# Add missing trailing slashes to any aliases with a path that need them
for alias, url in self.aliases.items():
if urlparse(url).path is not None and not url.endswith("/"):
self.aliases[alias] += "/"

# TODO: revisit context manager logic here and in MPRester
def __enter__(self) -> Self:
"""Support for "with" context."""
Expand Down
108 changes: 60 additions & 48 deletions tests/ext/test_optimade.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,33 @@
import pytest
import requests

from pymatgen.core import SETTINGS
from pymatgen.ext.optimade import OptimadeRester
from pymatgen.util.testing import PymatgenTest

try:
# 403 is returned when server detects bot-like behavior
website_down = requests.get("https://materialsproject.org").status_code not in (200, 403)
website_down = requests.get(OptimadeRester.aliases["mp"]).status_code not in (200, 403)
except requests.exceptions.ConnectionError:
website_down = True

try:
optimade_providers_down = requests.get("https://providers.optimade.org").status_code not in (200, 403)
except requests.exceptions.ConnectionError:
optimade_providers_down = True

try:
mc3d_down = requests.get(OptimadeRester.aliases["mcloud.mc3d"] + "/v1/info").status_code not in (200, 403, 301)
except requests.exceptions.ConnectionError:
mc3d_down = True

try:
mc2d_down = requests.get(OptimadeRester.aliases["mcloud.mc2d"] + "/v1/info").status_code not in (200, 403, 301)
except requests.exceptions.ConnectionError:
mc2d_down = True


class TestOptimade(PymatgenTest):
@pytest.mark.skipif(
not SETTINGS.get("PMG_MAPI_KEY") or website_down, reason="PMG_MAPI_KEY env var not set or MP is down."
)
@pytest.mark.skipif(website_down, reason="MP OPTIMADE is down.")
def test_get_structures_mp(self):
with OptimadeRester("mp") as optimade:
structs = optimade.get_structures(elements=["Ga", "N"], nelements=2)
Expand All @@ -35,9 +47,7 @@ def test_get_structures_mp(self):
raw_filter_structs["mp"]
), f"Raw filter {_filter} did not return the same number of results as the query builder."

@pytest.mark.skipif(
not SETTINGS.get("PMG_MAPI_KEY") or website_down, reason="PMG_MAPI_KEY env var not set or MP is down."
)
@pytest.mark.skipif(website_down, reason="MP OPTIMADE is down.")
def test_get_snls_mp(self):
base_query = dict(elements=["Ga", "N"], nelements=2, nsites=[2, 6])
with OptimadeRester("mp") as optimade:
Expand All @@ -60,48 +70,50 @@ def test_get_snls_mp(self):
struct_nl_set = next(iter(extra_fields_set["mp"].values()))
assert field_set <= {*struct_nl_set.data["_optimade"]}

# Tests fail in CI for unknown reason, use for development only.
# def test_get_structures_mcloud_2dstructures(self):
# with OptimadeRester("mcloud.2dstructures") as optimade:
# structs = optimade.get_structures(elements=["B", "N"], nelements=2)
@pytest.mark.skipif(mc3d_down or mc2d_down, reason="At least one MC OPTIMADE API is down.")
def test_get_structures_mcloud(self):
with OptimadeRester(["mcloud.mc2d", "mcloud.mc3d"]) as optimade:
structs = optimade.get_structures(elements=["B", "N"], nelements=2)

# test_struct = next(iter(structs["mcloud.2dstructures"].values()))
test_struct = next(iter(structs["mcloud.mc2d"].values()))

# assert [str(el) for el in test_struct.types_of_species] == ["B", "N"]
assert [str(el) for el in test_struct.types_of_species] == ["B", "N"]

# def test_update_aliases(self):
#
# with OptimadeRester() as optimade:
# optimade.refresh_aliases()
#
# self.assertIn("mp", optimade.aliases)
test_struct = next(iter(structs["mcloud.mc3d"].values()))
assert [str(el) for el in test_struct.types_of_species] == ["B", "N"]

@pytest.mark.skipif(optimade_providers_down, reason="OPTIMADE providers list is down.")
def test_update_aliases(self):
with OptimadeRester() as optimade:
optimade.refresh_aliases()

assert "mp" in optimade.aliases

def test_build_filter(self):
with OptimadeRester("mp") as optimade:
assert optimade._build_filter(
elements=["Ga", "N"],
nelements=2,
nsites=(1, 100),
chemical_formula_anonymous="A2B",
chemical_formula_hill="GaN",
) == (
'(elements HAS ALL "Ga", "N")'
" AND (nsites>=1 AND nsites<=100)"
" AND (nelements=2)"
" AND (chemical_formula_anonymous='A2B')"
" AND (chemical_formula_hill='GaN')"
)

assert optimade._build_filter(
elements=["C", "H", "O"],
nelements=(3, 4),
nsites=(1, 100),
chemical_formula_anonymous="A4B3C",
chemical_formula_hill="C4H3O",
) == (
'(elements HAS ALL "C", "H", "O")'
" AND (nsites>=1 AND nsites<=100)"
" AND (nelements>=3 AND nelements<=4)"
" AND (chemical_formula_anonymous='A4B3C')"
" AND (chemical_formula_hill='C4H3O')"
)
assert OptimadeRester._build_filter(
elements=["Ga", "N"],
nelements=2,
nsites=(1, 100),
chemical_formula_anonymous="A2B",
chemical_formula_hill="GaN",
) == (
'(elements HAS ALL "Ga", "N")'
" AND (nsites>=1 AND nsites<=100)"
" AND (nelements=2)"
" AND (chemical_formula_anonymous='A2B')"
" AND (chemical_formula_hill='GaN')"
)

assert OptimadeRester._build_filter(
elements=["C", "H", "O"],
nelements=(3, 4),
nsites=(1, 100),
chemical_formula_anonymous="A4B3C",
chemical_formula_hill="C4H3O",
) == (
'(elements HAS ALL "C", "H", "O")'
" AND (nsites>=1 AND nsites<=100)"
" AND (nelements>=3 AND nelements<=4)"
" AND (chemical_formula_anonymous='A4B3C')"
" AND (chemical_formula_hill='C4H3O')"
)

0 comments on commit 37d1066

Please sign in to comment.