Module `gplately.ptt.gpmdb`

Expand source code

import argparse
import json
import math
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import pygplates
import requests
from plate_model_manager import PlateModelManager

# you need pygplates and pandas to run this script
# using gplately env is recommended
# `micromamba activate gplately`

DATA_CACHE_DIR = "data-cache"
QUERY_DATA_URL = "https://www.gpmdb.net/get_query_data/"
QUERY_DATA_FILENAME = "query-data.json"
PMAG_RESULT_URL = "https://gpmdb.net/get_PMAGRESULT_data/?fmt=json"
PMAG_RESULT_FILENAME = "pmag-result.json"


def create_vgp_feature(
    RESULTNO,
    PLAT,
    PLONG,
    INC,
    DEC,
    DP,
    DM,
    LOMAGAGE,
    HIMAGAGE,
    LOWAGE,
    HIGHAGE,
    sample_site_position,
    plate_id,
):
    """function to create VGP feature"""
    # Paper reference or URL/DOI.
    PAPER_URL = "https://doi.org/10.1016/j.earscirev.2022.104258"

    # Create feature
    vgp_feature = pygplates.Feature(pygplates.FeatureType.gpml_virtual_geomagnetic_pole)
    vgp_feature.set_name(str(RESULTNO))
    vgp_feature.set_description(PAPER_URL)
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_sample_site_position,
        pygplates.GmlPoint(sample_site_position),
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_pole_position,
        pygplates.GmlPoint(pygplates.PointOnSphere(PLAT, PLONG)),
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_pole_a95, pygplates.XsDouble(math.sqrt(DP + DM))
    )
    vgp_feature.set(pygplates.PropertyName.gpml_pole_dp, pygplates.XsDouble(DP))
    vgp_feature.set(pygplates.PropertyName.gpml_pole_dm, pygplates.XsDouble(DM))
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_inclination, pygplates.XsDouble(INC)
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_declination, pygplates.XsDouble(DEC)
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_age,
        pygplates.XsDouble((LOMAGAGE + HIMAGAGE) / 2.0),
    )
    vgp_feature.set_valid_time(HIGHAGE, LOWAGE)

    vgp_feature.set_reconstruction_plate_id(plate_id)

    return vgp_feature


def assign_plate_id(df, static_polygon_file, rotation_file):
    """assign plate ids for sites"""
    pids = []
    sites = []
    plate_partitioner = pygplates.PlatePartitioner(
        static_polygon_file,
        rotation_file,
    )
    for index, row in df.iterrows():
        # print(index)
        sample_site_position = pygplates.PointOnSphere(row["SLAT"], row["SLONG"])
        reconstructed_static_polygon = plate_partitioner.partition_point(
            sample_site_position
        )
        if reconstructed_static_polygon:
            plate_id = (
                reconstructed_static_polygon.get_feature().get_reconstruction_plate_id()
            )
        else:
            plate_id = 0
        sites.append(sample_site_position)
        pids.append(plate_id)
    return sites, pids


class ArgParser(argparse.ArgumentParser):
    def error(self, message):
        sys.stderr.write(f"error: {message}\n")
        self.print_help()
        sys.exit(1)


def add_arguments(parser: argparse.ArgumentParser):
    """add command line argument parser"""
    parser.formatter_class = argparse.RawDescriptionHelpFormatter
    parser.description = __description__

    parser.set_defaults(func=main)

    parser.add_argument(
        "-m", "--model", type=str, dest="model_name", default="Muller2022"
    )
    parser.add_argument("-o", "--outfile", type=str, dest="outfile")


__description__ = """Retrieve paleomagnetic data from https://www.gpmdb.net, create GPlates-compatible VGP features and save the VGP features in a .gpmlz file.

    The two URLs being used are 
        - https://www.gpmdb.net/get_query_data/
        - https://www.gpmdb.net/get_PMAGRESULT_data/?fmt=json

    This command will create two files.
        - the .gpmlz file -- contains the GPlates-compatible VGP features
        - the pmag-result.csv -- contains the raw paleomagnetic data

    Usage example: gplately gpmdb -m zahirovic2022 -o test.gpmlz

    The default reconstruction model being used to assign plate IDs is "Muller2022". User can choose to specify the model with "-m/--model". 
    The avaliable model names can be found with command `pmm ls` (plate-model-manager https://pypi.org/project/plate-model-manager/; use gplately conda env).

    User can specify the output .gpmlz file name with "-o/--outfile". By default, the output file name will be "vgp_features_{model name}.gmplz".

    The `gplately gpmdb` will use model "Muller2022" and create the output file "vgp_features_Muller2022.gmplz". 

    The file name for the raw paleomagnetic data is always "pmag-result.csv".

    """


def main(args):
    Path(DATA_CACHE_DIR).mkdir(parents=True, exist_ok=True)

    # get query data
    if not os.path.isfile(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}"):
        response = requests.get(QUERY_DATA_URL, verify=False)
        query_data = response.json()
        with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "w+") as outfile:
            outfile.write(json.dumps(query_data))
    else:
        with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "r") as infile:
            query_data = json.load(infile)

    columns = []
    for c in query_data["columns"]:
        if isinstance(c, list):
            columns += c
        elif isinstance(c, str):
            columns.append(c)
        else:
            raise Exception(f"Invalid comlumn type: {type(c)}")

    df_query = pd.DataFrame(np.array(query_data["data"]), columns=columns)
    df_query = df_query.sort_values(by=["RESULTNO"], ignore_index=True)

    # get pmag-result data
    if not os.path.isfile(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}"):
        response = requests.get(PMAG_RESULT_URL, verify=False)
        pmagresult_data = response.json()
        with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "w+") as outfile:
            outfile.write(json.dumps(pmagresult_data))
    else:
        with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "r") as infile:
            pmagresult_data = json.load(infile)

    columns = []
    for c in pmagresult_data["columns"]:
        if isinstance(c, list):
            columns += c
        elif isinstance(c, str):
            columns.append(c)
        else:
            raise Exception(f"Invalid comlumn type: {type(c)}")

    df_pmagresult = pd.DataFrame(np.array(pmagresult_data["data"]), columns=columns)
    df_pmagresult = df_pmagresult.sort_values(by=["RESULTNO"], ignore_index=True)

    df_pmagresult["LOWAGE"] = df_query["LOWAGE"]
    df_pmagresult["HIGHAGE"] = df_query["HIGHAGE"]

    df = df_pmagresult[
        [
            "RESULTNO",
            "SLAT",
            "SLONG",
            "PLAT",
            "PLONG",
            "INC",
            "DEC",
            "DP",
            "DM",
            "LOMAGAGE",
            "HIMAGAGE",
            "LOWAGE",
            "HIGHAGE",
        ]
    ]

    pm_manager = PlateModelManager()
    model = pm_manager.get_model(args.model_name, data_dir=".")

    sites, pids = assign_plate_id(
        df,
        static_polygon_file=model.get_static_polygons(),
        rotation_file=model.get_rotation_model(),
    )

    count = 0
    vgp_features = []
    for index, row in df.iterrows():
        # print(index)
        if row["DP"] is not None and row["DM"] is not None and row["INC"] is not None:
            vgp_feature = create_vgp_feature(
                RESULTNO=row["RESULTNO"],
                PLAT=row["PLAT"],
                PLONG=row["PLONG"],
                INC=row["INC"],
                DEC=row["DEC"],
                DP=row["DP"],
                DM=row["DM"],
                LOMAGAGE=row["LOMAGAGE"],
                HIMAGAGE=row["HIMAGAGE"],
                LOWAGE=row["LOWAGE"],
                HIGHAGE=row["HIGHAGE"],
                sample_site_position=sites[index],
                plate_id=pids[index],
            )

            # Add VGP feature to list.
            vgp_features.append(vgp_feature)
        else:
            count += 1
            # print(f"ignore row: {row}")

    print(f"{count} rows have been ignored due to None values.")

    # Save VGP features to file.
    if not args.outfile:
        outfile_name = f"vgp_features_{args.model_name}.gpmlz"
    else:
        outfile_name = args.outfile

    pygplates.FeatureCollection(vgp_features).write(outfile_name)

    df_pmagresult = df_pmagresult.drop(["ROCKUNITNO"], axis=1)
    df_pmagresult.to_csv("pmag-result.csv", index=False)

    print(
        f"The files {outfile_name} and pmag-result.csv have been created successfully."
    )


if __name__ == "__main__":
    # The command-line parser.
    parser = argparse.ArgumentParser(
        description=__description__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    # add arguments
    add_arguments(parser)

    # Parse command-line options.
    args = parser.parse_args()

    # call main function
    main(args)

Functions

def add_arguments(parser: argparse.ArgumentParser)

add command line argument parser

Expand source code

def add_arguments(parser: argparse.ArgumentParser):
    """add command line argument parser"""
    parser.formatter_class = argparse.RawDescriptionHelpFormatter
    parser.description = __description__

    parser.set_defaults(func=main)

    parser.add_argument(
        "-m", "--model", type=str, dest="model_name", default="Muller2022"
    )
    parser.add_argument("-o", "--outfile", type=str, dest="outfile")

def assign_plate_id(df, static_polygon_file, rotation_file)

assign plate ids for sites

Expand source code

def assign_plate_id(df, static_polygon_file, rotation_file):
    """assign plate ids for sites"""
    pids = []
    sites = []
    plate_partitioner = pygplates.PlatePartitioner(
        static_polygon_file,
        rotation_file,
    )
    for index, row in df.iterrows():
        # print(index)
        sample_site_position = pygplates.PointOnSphere(row["SLAT"], row["SLONG"])
        reconstructed_static_polygon = plate_partitioner.partition_point(
            sample_site_position
        )
        if reconstructed_static_polygon:
            plate_id = (
                reconstructed_static_polygon.get_feature().get_reconstruction_plate_id()
            )
        else:
            plate_id = 0
        sites.append(sample_site_position)
        pids.append(plate_id)
    return sites, pids

def create_vgp_feature(RESULTNO, PLAT, PLONG, INC, DEC, DP, DM, LOMAGAGE, HIMAGAGE, LOWAGE, HIGHAGE, sample_site_position, plate_id)

function to create VGP feature

Expand source code

def create_vgp_feature(
    RESULTNO,
    PLAT,
    PLONG,
    INC,
    DEC,
    DP,
    DM,
    LOMAGAGE,
    HIMAGAGE,
    LOWAGE,
    HIGHAGE,
    sample_site_position,
    plate_id,
):
    """function to create VGP feature"""
    # Paper reference or URL/DOI.
    PAPER_URL = "https://doi.org/10.1016/j.earscirev.2022.104258"

    # Create feature
    vgp_feature = pygplates.Feature(pygplates.FeatureType.gpml_virtual_geomagnetic_pole)
    vgp_feature.set_name(str(RESULTNO))
    vgp_feature.set_description(PAPER_URL)
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_sample_site_position,
        pygplates.GmlPoint(sample_site_position),
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_pole_position,
        pygplates.GmlPoint(pygplates.PointOnSphere(PLAT, PLONG)),
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_pole_a95, pygplates.XsDouble(math.sqrt(DP + DM))
    )
    vgp_feature.set(pygplates.PropertyName.gpml_pole_dp, pygplates.XsDouble(DP))
    vgp_feature.set(pygplates.PropertyName.gpml_pole_dm, pygplates.XsDouble(DM))
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_inclination, pygplates.XsDouble(INC)
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_declination, pygplates.XsDouble(DEC)
    )
    vgp_feature.set(
        pygplates.PropertyName.gpml_average_age,
        pygplates.XsDouble((LOMAGAGE + HIMAGAGE) / 2.0),
    )
    vgp_feature.set_valid_time(HIGHAGE, LOWAGE)

    vgp_feature.set_reconstruction_plate_id(plate_id)

    return vgp_feature

def main(args)

Expand source code

def main(args):
    Path(DATA_CACHE_DIR).mkdir(parents=True, exist_ok=True)

    # get query data
    if not os.path.isfile(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}"):
        response = requests.get(QUERY_DATA_URL, verify=False)
        query_data = response.json()
        with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "w+") as outfile:
            outfile.write(json.dumps(query_data))
    else:
        with open(f"{DATA_CACHE_DIR}/{QUERY_DATA_FILENAME}", "r") as infile:
            query_data = json.load(infile)

    columns = []
    for c in query_data["columns"]:
        if isinstance(c, list):
            columns += c
        elif isinstance(c, str):
            columns.append(c)
        else:
            raise Exception(f"Invalid comlumn type: {type(c)}")

    df_query = pd.DataFrame(np.array(query_data["data"]), columns=columns)
    df_query = df_query.sort_values(by=["RESULTNO"], ignore_index=True)

    # get pmag-result data
    if not os.path.isfile(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}"):
        response = requests.get(PMAG_RESULT_URL, verify=False)
        pmagresult_data = response.json()
        with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "w+") as outfile:
            outfile.write(json.dumps(pmagresult_data))
    else:
        with open(f"{DATA_CACHE_DIR}/{PMAG_RESULT_FILENAME}", "r") as infile:
            pmagresult_data = json.load(infile)

    columns = []
    for c in pmagresult_data["columns"]:
        if isinstance(c, list):
            columns += c
        elif isinstance(c, str):
            columns.append(c)
        else:
            raise Exception(f"Invalid comlumn type: {type(c)}")

    df_pmagresult = pd.DataFrame(np.array(pmagresult_data["data"]), columns=columns)
    df_pmagresult = df_pmagresult.sort_values(by=["RESULTNO"], ignore_index=True)

    df_pmagresult["LOWAGE"] = df_query["LOWAGE"]
    df_pmagresult["HIGHAGE"] = df_query["HIGHAGE"]

    df = df_pmagresult[
        [
            "RESULTNO",
            "SLAT",
            "SLONG",
            "PLAT",
            "PLONG",
            "INC",
            "DEC",
            "DP",
            "DM",
            "LOMAGAGE",
            "HIMAGAGE",
            "LOWAGE",
            "HIGHAGE",
        ]
    ]

    pm_manager = PlateModelManager()
    model = pm_manager.get_model(args.model_name, data_dir=".")

    sites, pids = assign_plate_id(
        df,
        static_polygon_file=model.get_static_polygons(),
        rotation_file=model.get_rotation_model(),
    )

    count = 0
    vgp_features = []
    for index, row in df.iterrows():
        # print(index)
        if row["DP"] is not None and row["DM"] is not None and row["INC"] is not None:
            vgp_feature = create_vgp_feature(
                RESULTNO=row["RESULTNO"],
                PLAT=row["PLAT"],
                PLONG=row["PLONG"],
                INC=row["INC"],
                DEC=row["DEC"],
                DP=row["DP"],
                DM=row["DM"],
                LOMAGAGE=row["LOMAGAGE"],
                HIMAGAGE=row["HIMAGAGE"],
                LOWAGE=row["LOWAGE"],
                HIGHAGE=row["HIGHAGE"],
                sample_site_position=sites[index],
                plate_id=pids[index],
            )

            # Add VGP feature to list.
            vgp_features.append(vgp_feature)
        else:
            count += 1
            # print(f"ignore row: {row}")

    print(f"{count} rows have been ignored due to None values.")

    # Save VGP features to file.
    if not args.outfile:
        outfile_name = f"vgp_features_{args.model_name}.gpmlz"
    else:
        outfile_name = args.outfile

    pygplates.FeatureCollection(vgp_features).write(outfile_name)

    df_pmagresult = df_pmagresult.drop(["ROCKUNITNO"], axis=1)
    df_pmagresult.to_csv("pmag-result.csv", index=False)

    print(
        f"The files {outfile_name} and pmag-result.csv have been created successfully."
    )

Classes

class ArgParser (prog=None, usage=None, description=None, epilog=None, parents=[], formatter_class=argparse.HelpFormatter, prefix_chars='-', fromfile_prefix_chars=None, argument_default=None, conflict_handler='error', add_help=True, allow_abbrev=True, exit_on_error=True)

Object for parsing command line strings into Python objects.

Keyword Arguments: - prog – The name of the program (default: os.path.basename(sys.argv[0])) - usage – A usage message (default: auto-generated from arguments) - description – A description of what the program does - epilog – Text following the argument descriptions - parents – Parsers whose arguments should be copied into this one - formatter_class – HelpFormatter class for printing help messages - prefix_chars – Characters that prefix optional arguments - fromfile_prefix_chars – Characters that prefix files containing additional arguments - argument_default – The default value for all arguments - conflict_handler – String indicating how to handle conflicts - add_help – Add a -h/-help option - allow_abbrev – Allow long options to be abbreviated unambiguously - exit_on_error – Determines whether or not ArgumentParser exits with error info when an error occurs

Expand source code

class ArgParser(argparse.ArgumentParser):
    def error(self, message):
        sys.stderr.write(f"error: {message}\n")
        self.print_help()
        sys.exit(1)

Ancestors

argparse.ArgumentParser
argparse._AttributeHolder
argparse._ActionsContainer

Methods

def error(self, message)

error(message: string)

Prints a usage message incorporating the message to stderr and exits.

If you override this in a subclass, it should not return – it should either exit or raise an exception.

Expand source code

def error(self, message):
    sys.stderr.write(f"error: {message}\n")
    self.print_help()
    sys.exit(1)