# Licensed under a 3-clause BSD style license - see LICENSE.rst """ This is a set of regression tests for vo. """ # STDLIB import difflib import gzip import io import pathlib import re import sys from unittest import mock # THIRD-PARTY import numpy as np import pytest from numpy.testing import assert_array_equal # LOCAL from astropy.io.votable import tree from astropy.io.votable.exceptions import W31, W39, VOTableSpecError, VOWarning from astropy.io.votable.table import parse, parse_single_table, validate from astropy.io.votable.xmlutil import validate_schema from astropy.utils.data import get_pkg_data_filename, get_pkg_data_filenames # Determine the kind of float formatting in this build of Python if hasattr(sys, "float_repr_style"): legacy_float_repr = sys.float_repr_style == "legacy" else: legacy_float_repr = sys.platform.startswith("win") def assert_validate_schema(filename, version): __tracebackhide__ = True if sys.platform.startswith("win"): return try: rc, stdout, stderr = validate_schema(filename, version) except OSError: # If xmllint is not installed, we want the test to pass anyway return assert rc == 0, "File did not validate against VOTable schema" def test_parse_single_table(): with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 table = parse_single_table(get_pkg_data_filename("data/regression.xml")) assert isinstance(table, tree.TableElement) assert len(table.array) == 5 def test_parse_single_table2(): with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 table2 = parse_single_table( get_pkg_data_filename("data/regression.xml"), table_number=1 ) assert isinstance(table2, tree.TableElement) assert len(table2.array) == 1 assert len(table2.array.dtype.names) == 29 def test_parse_single_table3(): with pytest.raises(IndexError): parse_single_table(get_pkg_data_filename("data/regression.xml"), table_number=3) def _test_regression(tmp_path, _python_based=False, binary_mode=1): # Read the VOTABLE votable = parse( get_pkg_data_filename("data/regression.xml"), _debug_python_based_parser=_python_based, ) table = votable.get_first_table() dtypes = [ (("string test", "string_test"), "|O8"), (("fixed string test", "string_test_2"), "") new_dtypes.append(tuple(dtype)) dtypes = new_dtypes assert table.array.dtype == dtypes votable.to_xml( str(tmp_path / "regression.tabledata.xml"), _debug_python_based_parser=_python_based, ) assert_validate_schema(str(tmp_path / "regression.tabledata.xml"), votable.version) if binary_mode == 1: votable.get_first_table().format = "binary" votable.version = "1.1" elif binary_mode == 2: votable.get_first_table()._config["version_1_3_or_later"] = True votable.get_first_table().format = "binary2" votable.version = "1.3" # Also try passing a file handle with open(str(tmp_path / "regression.binary.xml"), "wb") as fd: votable.to_xml(fd, _debug_python_based_parser=_python_based) assert_validate_schema(str(tmp_path / "regression.binary.xml"), votable.version) # Also try passing a file handle with open(str(tmp_path / "regression.binary.xml"), "rb") as fd: votable2 = parse(fd, _debug_python_based_parser=_python_based) votable2.get_first_table().format = "tabledata" votable2.to_xml( str(tmp_path / "regression.bin.tabledata.xml"), _astropy_version="testing", _debug_python_based_parser=_python_based, ) assert_validate_schema( str(tmp_path / "regression.bin.tabledata.xml"), votable.version ) with open( get_pkg_data_filename( f"data/regression.bin.tabledata.truth.{votable.version}.xml" ), encoding="utf-8", ) as fd: truth = fd.readlines() with open(str(tmp_path / "regression.bin.tabledata.xml"), encoding="utf-8") as fd: output = fd.readlines() # If the lines happen to be different, print a diff # This is convenient for debugging sys.stdout.writelines( difflib.unified_diff(truth, output, fromfile="truth", tofile="output") ) assert truth == output # Test implicit gzip saving votable2.to_xml( str(tmp_path / "regression.bin.tabledata.xml.gz"), _astropy_version="testing", _debug_python_based_parser=_python_based, ) with gzip.GzipFile(str(tmp_path / "regression.bin.tabledata.xml.gz"), "rb") as gzfd: output = gzfd.readlines() output = [x.decode("utf-8").rstrip() for x in output] truth = [x.rstrip() for x in truth] assert truth == output @pytest.mark.xfail("legacy_float_repr") def test_regression(tmp_path): # W39: Bit values can not be masked with pytest.warns(W39), np.errstate(over="ignore"): _test_regression(tmp_path, False) @pytest.mark.xfail("legacy_float_repr") def test_regression_python_based_parser(tmp_path): # W39: Bit values can not be masked with pytest.warns(W39), np.errstate(over="ignore"): _test_regression(tmp_path, True) @pytest.mark.xfail("legacy_float_repr") def test_regression_binary2(tmp_path): # W39: Bit values can not be masked with pytest.warns(W39), np.errstate(over="ignore"): _test_regression(tmp_path, False, 2) class TestFixups: def setup_class(self): with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 self.table = parse( get_pkg_data_filename("data/regression.xml") ).get_first_table() self.array = self.table.array self.mask = self.table.array.mask def test_implicit_id(self): assert_array_equal(self.array["string_test_2"], self.array["fixed string test"]) class TestReferences: def setup_class(self): with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 self.votable = parse(get_pkg_data_filename("data/regression.xml")) self.table = self.votable.get_first_table() self.array = self.table.array self.mask = self.table.array.mask def test_fieldref(self): fieldref = self.table.groups[1].entries[0] assert isinstance(fieldref, tree.FieldRef) assert fieldref.get_ref().name == "boolean" assert fieldref.get_ref().datatype == "boolean" def test_paramref(self): paramref = self.table.groups[0].entries[0] assert isinstance(paramref, tree.ParamRef) assert paramref.get_ref().name == "INPUT" assert paramref.get_ref().datatype == "float" def test_iter_fields_and_params_on_a_group(self): assert len(list(self.table.groups[1].iter_fields_and_params())) == 2 def test_iter_groups_on_a_group(self): assert len(list(self.table.groups[1].iter_groups())) == 1 def test_iter_groups(self): # Because of the ref'd table, there are more logical groups # than actually exist in the file assert len(list(self.votable.iter_groups())) == 9 def test_ref_table(self): tables = list(self.votable.iter_tables()) for x, y in zip(tables[0].array.data[0], tables[1].array.data[0]): assert_array_equal(x, y) def test_iter_coosys(self): assert len(list(self.votable.iter_coosys())) == 1 @pytest.mark.parametrize( "columns, expected_missing", [ # a single non-existent column pytest.param(["c1"], ["c1"], id="basic"), # multiple missing columns (checking that order is preserved) pytest.param(["c1", "c2", "c3"], ["c1", "c2", "c3"], id="check-ordering"), # mixing existing with missing columns pytest.param(["c1", "string_test", "c2"], ["c1", "c2"], id="list-only-missing"), ], ) def test_select_missing_columns_error_message(columns, expected_missing): # see https://github.com/astropy/astropy/pull/15956 filename = get_pkg_data_filename("data/regression.xml") with pytest.raises( ValueError, match=re.escape(f"Columns {expected_missing!r} were not found in fields list"), ): parse_single_table(filename, columns=columns) def test_select_columns_by_index(): columns = [0, 5, 14] table = parse( get_pkg_data_filename("data/regression.xml"), columns=columns ).get_first_table() array = table.array mask = table.array.mask assert array["string_test"][0] == "String & test" columns = ["string_test", "unsignedByte", "bitarray"] for c in columns: assert not np.all(mask[c]) # deselected columns shouldn't be present in the output assert "unicode_test" not in array.dtype.fields assert "unicode_test" not in mask.dtype.fields def test_select_columns_by_name(): columns = ["string_test", "unsignedByte", "bitarray"] table = parse( get_pkg_data_filename("data/regression.xml"), columns=columns ).get_first_table() array = table.array mask = table.array.mask assert array["string_test"][0] == "String & test" for c in columns: assert not np.all(mask[c]) # deselected columns shouldn't be present in the output assert "unicode_test" not in array.dtype.fields assert "unicode_test" not in mask.dtype.fields @pytest.mark.parametrize( "column_ids, use_names_over_ids, expected_names", [ # just the first column pytest.param( ["string_test"], False, ["string_test"], id="first-col-ids", ), pytest.param( ["string_test"], True, ["string test"], id="first-col-names", ), # a single column, other than the first pytest.param( ["unicode_test"], False, ["unicode_test"], id="single-col-ids", ), pytest.param( ["unicode_test"], True, ["unicode_test"], id="single-col-names", ), # two non-consecutive, differently named columns pytest.param( ["string_test", "unicode_test"], False, ["string_test", "unicode_test"], id="two-cols-ids", ), pytest.param( ["string_test", "unicode_test"], True, ["string test", "unicode_test"], id="two-cols-names", ), # just the first two columns (that have the same ID) pytest.param( ["string_test", "string_test_2"], False, ["string_test", "string_test_2"], id="two-cols-ids-sameID", ), pytest.param( ["string_test", "string_test_2"], True, ["string test", "fixed string test"], id="two-cols-names-sameID", ), # columns should be returned in the order they are found, which # in the general case isn't the order they are requested pytest.param( ["unicode_test", "string_test"], False, ["string_test", "unicode_test"], id="two-cols-ids-order-mismatch", ), pytest.param( ["unicode_test", "string_test"], True, ["string test", "unicode_test"], id="two-cols-names-order-mismatch", ), ], ) def test_select_columns_by_name_edge_cases( column_ids, use_names_over_ids, expected_names ): # see https://github.com/astropy/astropy/issues/14943 filename = get_pkg_data_filename("data/regression.xml") with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 vot1 = parse_single_table(filename, columns=column_ids) t1 = vot1.to_table(use_names_over_ids=use_names_over_ids) assert t1.colnames == expected_names class TestParse: def setup_class(self): with np.errstate(over="ignore"): # https://github.com/astropy/astropy/issues/13341 self.votable = parse(get_pkg_data_filename("data/regression.xml")) self.table = self.votable.get_first_table() self.array = self.table.array self.mask = self.table.array.mask def test_string_test(self): assert issubclass(self.array["string_test"].dtype.type, np.object_) assert_array_equal( self.array["string_test"], ["String & test", "String & test", "XXXX", "", ""], ) def test_fixed_string_test(self): assert issubclass(self.array["string_test_2"].dtype.type, np.str_) assert_array_equal( self.array["string_test_2"], ["Fixed stri", "0123456789", "XXXX", "", ""] ) def test_unicode_test(self): assert issubclass(self.array["unicode_test"].dtype.type, np.object_) assert_array_equal( self.array["unicode_test"], ["Ceçi n'est pas un pipe", "வணக்கம்", "XXXX", "", ""], ) def test_fixed_unicode_test(self): assert issubclass(self.array["fixed_unicode_test"].dtype.type, np.str_) assert_array_equal( self.array["fixed_unicode_test"], ["Ceçi n'est", "வணக்கம்", "0123456789", "", ""], ) def test_unsignedByte(self): assert issubclass(self.array["unsignedByte"].dtype.type, np.uint8) assert_array_equal(self.array["unsignedByte"], [128, 255, 0, 255, 255]) assert not np.any(self.mask["unsignedByte"]) def test_short(self): assert issubclass(self.array["short"].dtype.type, np.int16) assert_array_equal(self.array["short"], [4096, 32767, -4096, 32767, 32767]) assert not np.any(self.mask["short"]) def test_int(self): assert issubclass(self.array["int"].dtype.type, np.int32) assert_array_equal( self.array["int"], [268435456, 2147483647, -268435456, 268435455, 123456789] ) assert_array_equal(self.mask["int"], [False, False, False, False, True]) def test_long(self): assert issubclass(self.array["long"].dtype.type, np.int64) assert_array_equal( self.array["long"], [ 922337203685477, 123456789, -1152921504606846976, 1152921504606846975, 123456789, ], ) assert_array_equal(self.mask["long"], [False, True, False, False, True]) def test_double(self): assert issubclass(self.array["double"].dtype.type, np.float64) assert_array_equal( self.array["double"], [8.9990234375, 0.0, np.inf, np.nan, -np.inf] ) assert_array_equal(self.mask["double"], [False, False, False, True, False]) def test_float(self): assert issubclass(self.array["float"].dtype.type, np.float32) assert_array_equal(self.array["float"], [1.0, 0.0, np.inf, np.inf, np.nan]) assert_array_equal(self.mask["float"], [False, False, False, False, True]) def test_array(self): assert issubclass(self.array["array"].dtype.type, np.object_) match = [ [], [[42, 32], [12, 32]], [[12, 34], [56, 78], [87, 65], [43, 21]], [[-1, 23]], [[31, -1]], ] for a, b in zip(self.array["array"], match): # assert issubclass(a.dtype.type, np.int64) # assert a.shape[1] == 2 for a0, b0 in zip(a, b): assert issubclass(a0.dtype.type, np.int64) assert_array_equal(a0, b0) assert self.array.data["array"][3].mask[0][0] assert self.array.data["array"][4].mask[0][1] def test_bit(self): assert issubclass(self.array["bit"].dtype.type, np.bool_) assert_array_equal(self.array["bit"], [True, False, True, False, False]) def test_bit_mask(self): assert_array_equal(self.mask["bit"], [False, False, False, False, True]) def test_bitarray(self): assert issubclass(self.array["bitarray"].dtype.type, np.bool_) assert self.array["bitarray"].shape == (5, 3, 2) assert_array_equal( self.array["bitarray"], [ [[True, False], [True, True], [False, True]], [[False, True], [False, False], [True, True]], [[True, True], [True, False], [False, False]], [[False, False], [False, False], [False, False]], [[False, False], [False, False], [False, False]], ], ) def test_bitarray_mask(self): assert_array_equal( self.mask["bitarray"], [ [[False, False], [False, False], [False, False]], [[False, False], [False, False], [False, False]], [[False, False], [False, False], [False, False]], [[True, True], [True, True], [True, True]], [[True, True], [True, True], [True, True]], ], ) def test_bitvararray(self): assert issubclass(self.array["bitvararray"].dtype.type, np.object_) match = [ [True, True, True], [False, False, False, False, False], [True, False, True, False, True], [], [], ] for a, b in zip(self.array["bitvararray"], match): assert_array_equal(a, b) match_mask = [ [False, False, False], [False, False, False, False, False], [False, False, False, False, False], False, False, ] for a, b in zip(self.array["bitvararray"], match_mask): assert_array_equal(a.mask, b) def test_bitvararray2(self): assert issubclass(self.array["bitvararray2"].dtype.type, np.object_) match = [ [], [ [[False, True], [False, False], [True, False]], [[True, False], [True, False], [True, False]], ], [[[True, True], [True, True], [True, True]]], [], [], ] for a, b in zip(self.array["bitvararray2"], match): for a0, b0 in zip(a, b): assert a0.shape == (3, 2) assert issubclass(a0.dtype.type, np.bool_) assert_array_equal(a0, b0) def test_floatComplex(self): assert issubclass(self.array["floatComplex"].dtype.type, np.complex64) assert_array_equal( self.array["floatComplex"], [np.nan + 0j, 0 + 0j, 0 + -1j, np.nan + 0j, np.nan + 0j], ) assert_array_equal(self.mask["floatComplex"], [True, False, False, True, True]) def test_doubleComplex(self): assert issubclass(self.array["doubleComplex"].dtype.type, np.complex128) assert_array_equal( self.array["doubleComplex"], [np.nan + 0j, 0 + 0j, 0 + -1j, np.nan + (np.inf * 1j), np.nan + 0j], ) assert_array_equal(self.mask["doubleComplex"], [True, False, False, True, True]) def test_doubleComplexArray(self): assert issubclass(self.array["doubleComplexArray"].dtype.type, np.object_) assert [len(x) for x in self.array["doubleComplexArray"]] == [0, 2, 2, 0, 0] def test_boolean(self): assert issubclass(self.array["boolean"].dtype.type, np.bool_) assert_array_equal(self.array["boolean"], [True, False, True, False, False]) def test_boolean_mask(self): assert_array_equal(self.mask["boolean"], [False, False, False, False, True]) def test_boolean_array(self): assert issubclass(self.array["booleanArray"].dtype.type, np.bool_) assert_array_equal( self.array["booleanArray"], [ [True, True, True, True], [True, True, False, True], [True, True, False, True], [False, False, False, False], [False, False, False, False], ], ) def test_boolean_array_mask(self): assert_array_equal( self.mask["booleanArray"], [ [False, False, False, False], [False, False, False, False], [False, False, True, False], [True, True, True, True], [True, True, True, True], ], ) def test_nulls(self): assert_array_equal(self.array["nulls"], [0, -9, 2, -9, -9]) assert_array_equal(self.mask["nulls"], [False, True, False, True, True]) def test_nulls_array(self): assert_array_equal( self.array["nulls_array"], [ [[-9, -9], [-9, -9]], [[0, 1], [2, 3]], [[-9, 0], [-9, 1]], [[0, -9], [1, -9]], [[-9, -9], [-9, -9]], ], ) assert_array_equal( self.mask["nulls_array"], [ [[True, True], [True, True]], [[False, False], [False, False]], [[True, False], [True, False]], [[False, True], [False, True]], [[True, True], [True, True]], ], ) def test_double_array(self): assert issubclass(self.array["doublearray"].dtype.type, np.object_) assert len(self.array["doublearray"][0]) == 0 assert_array_equal( self.array["doublearray"][1], [0, 1, np.inf, -np.inf, np.nan, 0, -1] ) assert_array_equal( self.array.data["doublearray"][1].mask, [False, False, False, False, False, False, True], ) def test_bit_array2(self): assert_array_equal( self.array["bitarray2"][0], [ True, True, True, True, False, False, False, False, True, True, True, True, False, False, False, False, ], ) def test_bit_array2_mask(self): assert not np.any(self.mask["bitarray2"][0]) assert np.all(self.mask["bitarray2"][1:]) def test_get_coosys_by_id(self): coosys = self.votable.get_coosys_by_id("J2000") assert coosys.system == "eq_FK5" def test_get_field_by_utype(self): fields = list(self.votable.get_fields_by_utype("myint")) assert fields[0].name == "int" assert fields[0].values.min == -1000 def test_get_info_by_id(self): info = self.votable.get_info_by_id("QUERY_STATUS") assert info.value == "OK" if self.votable.version != "1.1": info = self.votable.get_info_by_id("ErrorInfo") assert info.value == "One might expect to find some INFO here, too..." def test_repr(self): assert "3 tables" in repr(self.votable) assert ( repr(list(self.votable.iter_fields_and_params())[0]) == '' ) # Smoke test repr(list(self.votable.iter_groups())) # Resource assert repr(self.votable.resources) == "[]" # Table assert repr(self.table).startswith(" """ ) ) infos = vot.get_infos_by_name("creator-name") assert [i.value for i in infos] == ["Cannon, A.", "Fleming, W."]