Module gplately.feature_filter

Expand source code
import abc
import argparse
from typing import List

import pygplates


class FeatureFilter(metaclass=abc.ABCMeta):
    @classmethod
    def __subclasshook__(cls, subclass):
        return (
            hasattr(subclass, "should_keep")
            and callable(subclass.should_keep)
            or NotImplemented
        )

    @abc.abstractmethod
    def should_keep(self, feature: pygplates.Feature) -> bool:
        """This abstract method must be implemented in subclass.

        :param feature: pygplates.Feature

        :returns: true if the feature should be kept; false otherwise
        """

        raise NotImplementedError


class FeatureNameFilter(FeatureFilter):
    """filter features by name

    for example:
        FeatureNameFilter(['Africa', 'Asia']) -- keep features who name contains 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True) -- keep features who name does not contain 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exact_match=True) -- keep features who name is 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True) -- keep features who name is not 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True, case_sensitive=True) -- keep features who name is not 'Africa' or 'Asia' (case sensitive)
    """

    def __init__(
        self, names: List[str], exact_match=False, case_sensitive=False, exclude=False
    ):
        self.names = names
        self.exact_match = exact_match
        self.case_sensitive = case_sensitive
        self.exclude = exclude

    def check_name(self, name_1: str, name_2: str) -> bool:
        """check if two names are the same or name_2 contains name_1"""
        if not self.case_sensitive:
            name_1_tmp = name_1.lower()
            name_2_tmp = name_2.lower()
        else:
            name_1_tmp = name_1
            name_2_tmp = name_2
        if self.exact_match:
            return name_1_tmp == name_2_tmp
        else:
            return name_1_tmp in name_2_tmp

    def should_keep(self, feature: pygplates.Feature) -> bool:
        if self.exclude:
            for name in self.names:
                if self.check_name(name, feature.get_name()):
                    return False
            return True
        else:
            for name in self.names:
                if self.check_name(name, feature.get_name()):
                    return True
            return False


class PlateIDFilter(FeatureFilter):
    """filter features by plate ID

    for example:
        PlateIDFilter([101,201,301]) -- keep features whose plate id is 101 or 201 or 301
        PlateIDFilter([101,201,301], exclude=True) -- keep features whose plate id is not 101 nor 201 nor 301

    """

    def __init__(self, pids: List[int], exclude=False):
        self.pids = pids
        self.exclude = exclude

    def should_keep(self, feature: pygplates.Feature) -> bool:
        if not self.exclude and feature.get_reconstruction_plate_id() in self.pids:
            return True
        if self.exclude and feature.get_reconstruction_plate_id() not in self.pids:
            return True
        return False


class BirthAgeFilter(FeatureFilter):
    """filter features by the time of appearance

    for example:
        BirthAgeFilter(500) -- keep features whose time of apprearance are bigger than 500
        BirthAgeFilter(500, keep_older=False) --  keep features whose time of apprearance are smaller than 500

    :param age: the age criterion
    :param keep_older: if True, return True when the feature's birth age is older than the age criterion. If False, otherwise.

    """

    def __init__(self, age: float, keep_older=True):
        self.age = age
        self.keep_older = keep_older

    def should_keep(self, feature: pygplates.Feature) -> bool:
        valid_time = feature.get_valid_time(None)
        if valid_time:
            if self.keep_older and valid_time[0] > self.age:
                return True
            if not self.keep_older and valid_time[0] < self.age:
                return True
        return False


def filter_feature_collection(
    feature_collection: pygplates.FeatureCollection, filters: List[FeatureFilter]
):
    """Filter feature collection by various criteria.

    See scripts/test_feature_filter.sh for usage examples.
    """
    new_feature_collection = pygplates.FeatureCollection()
    for feature in feature_collection:
        keep_flag = True
        for filter in filters:
            if not filter.should_keep(feature):
                keep_flag = False
                break
        if keep_flag:
            new_feature_collection.add(feature)
    return new_feature_collection


def add_parser(subparser):
    """add feature filter command line argument parser"""
    filter_cmd = subparser.add_parser(
        "filter",
        help="Filter feature collection by various criteria.",
        description=filter_feature_collection.__doc__,
    )
    filter_cmd.formatter_class = argparse.RawDescriptionHelpFormatter

    # feature filter command arguments
    filter_cmd.set_defaults(func=run_filter_feature_collection)
    filter_cmd.add_argument("filter_input_file", type=str)
    filter_cmd.add_argument("filter_output_file", type=str)

    name_group = filter_cmd.add_mutually_exclusive_group()
    name_group.add_argument("-n", "--names", type=str, dest="names", nargs="+")
    name_group.add_argument(
        "--exclude-names", type=str, dest="exclude_names", nargs="+"
    )

    pid_group = filter_cmd.add_mutually_exclusive_group()
    pid_group.add_argument("-p", "--pids", type=int, dest="pids", nargs="+")
    pid_group.add_argument("--exclude-pids", type=int, dest="exclude_pids", nargs="+")

    birth_age_group = filter_cmd.add_mutually_exclusive_group()
    birth_age_group.add_argument(
        "-a", "--min-birth-age", type=float, dest="min_birth_age"
    )
    birth_age_group.add_argument("--max-birth-age", type=float, dest="max_birth_age")

    filter_cmd.add_argument(
        "--case-sensitive", dest="case_sensitive", action="store_true"
    )
    filter_cmd.add_argument("--exact-match", dest="exact_match", action="store_true")


def run_filter_feature_collection(args):
    """Filter the input feature collection according to command line arguments."""
    input_feature_collection = pygplates.FeatureCollection(args.filter_input_file)

    filters = []
    if args.names:
        filters.append(
            FeatureNameFilter(
                args.names,
                exact_match=args.exact_match,
                case_sensitive=args.case_sensitive,
            )
        )
    elif args.exclude_names:
        filters.append(
            FeatureNameFilter(
                args.exclude_names,
                exclude=True,
                exact_match=args.exact_match,
                case_sensitive=args.case_sensitive,
            )
        )

    if args.pids:
        filters.append(PlateIDFilter(args.pids))
    elif args.exclude_pids:
        filters.append(PlateIDFilter(args.exclude_pids, exclude=True))

    # print(args.max_birth_age)
    if args.max_birth_age is not None:
        filters.append(BirthAgeFilter(args.max_birth_age, keep_older=False))
    elif args.min_birth_age is not None:
        filters.append(BirthAgeFilter(args.min_birth_age))

    new_fc = filter_feature_collection(
        input_feature_collection,
        filters,
    )

    new_fc.write(args.filter_output_file)
    print(
        f"Done! The filtered feature collection has been saved to {args.filter_output_file}."
    )

Functions

def add_parser(subparser)

add feature filter command line argument parser

Expand source code
def add_parser(subparser):
    """add feature filter command line argument parser"""
    filter_cmd = subparser.add_parser(
        "filter",
        help="Filter feature collection by various criteria.",
        description=filter_feature_collection.__doc__,
    )
    filter_cmd.formatter_class = argparse.RawDescriptionHelpFormatter

    # feature filter command arguments
    filter_cmd.set_defaults(func=run_filter_feature_collection)
    filter_cmd.add_argument("filter_input_file", type=str)
    filter_cmd.add_argument("filter_output_file", type=str)

    name_group = filter_cmd.add_mutually_exclusive_group()
    name_group.add_argument("-n", "--names", type=str, dest="names", nargs="+")
    name_group.add_argument(
        "--exclude-names", type=str, dest="exclude_names", nargs="+"
    )

    pid_group = filter_cmd.add_mutually_exclusive_group()
    pid_group.add_argument("-p", "--pids", type=int, dest="pids", nargs="+")
    pid_group.add_argument("--exclude-pids", type=int, dest="exclude_pids", nargs="+")

    birth_age_group = filter_cmd.add_mutually_exclusive_group()
    birth_age_group.add_argument(
        "-a", "--min-birth-age", type=float, dest="min_birth_age"
    )
    birth_age_group.add_argument("--max-birth-age", type=float, dest="max_birth_age")

    filter_cmd.add_argument(
        "--case-sensitive", dest="case_sensitive", action="store_true"
    )
    filter_cmd.add_argument("--exact-match", dest="exact_match", action="store_true")
def filter_feature_collection(feature_collection: pygplates.pygplates.FeatureCollection, filters: List[FeatureFilter])

Filter feature collection by various criteria.

See scripts/test_feature_filter.sh for usage examples.

Expand source code
def filter_feature_collection(
    feature_collection: pygplates.FeatureCollection, filters: List[FeatureFilter]
):
    """Filter feature collection by various criteria.

    See scripts/test_feature_filter.sh for usage examples.
    """
    new_feature_collection = pygplates.FeatureCollection()
    for feature in feature_collection:
        keep_flag = True
        for filter in filters:
            if not filter.should_keep(feature):
                keep_flag = False
                break
        if keep_flag:
            new_feature_collection.add(feature)
    return new_feature_collection
def run_filter_feature_collection(args)

Filter the input feature collection according to command line arguments.

Expand source code
def run_filter_feature_collection(args):
    """Filter the input feature collection according to command line arguments."""
    input_feature_collection = pygplates.FeatureCollection(args.filter_input_file)

    filters = []
    if args.names:
        filters.append(
            FeatureNameFilter(
                args.names,
                exact_match=args.exact_match,
                case_sensitive=args.case_sensitive,
            )
        )
    elif args.exclude_names:
        filters.append(
            FeatureNameFilter(
                args.exclude_names,
                exclude=True,
                exact_match=args.exact_match,
                case_sensitive=args.case_sensitive,
            )
        )

    if args.pids:
        filters.append(PlateIDFilter(args.pids))
    elif args.exclude_pids:
        filters.append(PlateIDFilter(args.exclude_pids, exclude=True))

    # print(args.max_birth_age)
    if args.max_birth_age is not None:
        filters.append(BirthAgeFilter(args.max_birth_age, keep_older=False))
    elif args.min_birth_age is not None:
        filters.append(BirthAgeFilter(args.min_birth_age))

    new_fc = filter_feature_collection(
        input_feature_collection,
        filters,
    )

    new_fc.write(args.filter_output_file)
    print(
        f"Done! The filtered feature collection has been saved to {args.filter_output_file}."
    )

Classes

class BirthAgeFilter (age: float, keep_older=True)

filter features by the time of appearance

for example: BirthAgeFilter(500) – keep features whose time of apprearance are bigger than 500 BirthAgeFilter(500, keep_older=False) – keep features whose time of apprearance are smaller than 500

:param age: the age criterion :param keep_older: if True, return True when the feature's birth age is older than the age criterion. If False, otherwise.

Expand source code
class BirthAgeFilter(FeatureFilter):
    """filter features by the time of appearance

    for example:
        BirthAgeFilter(500) -- keep features whose time of apprearance are bigger than 500
        BirthAgeFilter(500, keep_older=False) --  keep features whose time of apprearance are smaller than 500

    :param age: the age criterion
    :param keep_older: if True, return True when the feature's birth age is older than the age criterion. If False, otherwise.

    """

    def __init__(self, age: float, keep_older=True):
        self.age = age
        self.keep_older = keep_older

    def should_keep(self, feature: pygplates.Feature) -> bool:
        valid_time = feature.get_valid_time(None)
        if valid_time:
            if self.keep_older and valid_time[0] > self.age:
                return True
            if not self.keep_older and valid_time[0] < self.age:
                return True
        return False

Ancestors

Inherited members

class FeatureFilter
Expand source code
class FeatureFilter(metaclass=abc.ABCMeta):
    @classmethod
    def __subclasshook__(cls, subclass):
        return (
            hasattr(subclass, "should_keep")
            and callable(subclass.should_keep)
            or NotImplemented
        )

    @abc.abstractmethod
    def should_keep(self, feature: pygplates.Feature) -> bool:
        """This abstract method must be implemented in subclass.

        :param feature: pygplates.Feature

        :returns: true if the feature should be kept; false otherwise
        """

        raise NotImplementedError

Subclasses

Methods

def should_keep(self, feature: pygplates.pygplates.Feature) -> bool

This abstract method must be implemented in subclass.

:param feature: pygplates.Feature

:returns: true if the feature should be kept; false otherwise

Expand source code
@abc.abstractmethod
def should_keep(self, feature: pygplates.Feature) -> bool:
    """This abstract method must be implemented in subclass.

    :param feature: pygplates.Feature

    :returns: true if the feature should be kept; false otherwise
    """

    raise NotImplementedError
class FeatureNameFilter (names: List[str], exact_match=False, case_sensitive=False, exclude=False)

filter features by name

for example: FeatureNameFilter(['Africa', 'Asia']) – keep features who name contains 'Africa' or 'Asia' FeatureNameFilter(['Africa', 'Asia'], exclude=True) – keep features who name does not contain 'Africa' or 'Asia' FeatureNameFilter(['Africa', 'Asia'], exact_match=True) – keep features who name is 'Africa' or 'Asia' FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True) – keep features who name is not 'Africa' or 'Asia' FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True, case_sensitive=True) – keep features who name is not 'Africa' or 'Asia' (case sensitive)

Expand source code
class FeatureNameFilter(FeatureFilter):
    """filter features by name

    for example:
        FeatureNameFilter(['Africa', 'Asia']) -- keep features who name contains 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True) -- keep features who name does not contain 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exact_match=True) -- keep features who name is 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True) -- keep features who name is not 'Africa' or 'Asia'
        FeatureNameFilter(['Africa', 'Asia'], exclude=True, exact_match=True, case_sensitive=True) -- keep features who name is not 'Africa' or 'Asia' (case sensitive)
    """

    def __init__(
        self, names: List[str], exact_match=False, case_sensitive=False, exclude=False
    ):
        self.names = names
        self.exact_match = exact_match
        self.case_sensitive = case_sensitive
        self.exclude = exclude

    def check_name(self, name_1: str, name_2: str) -> bool:
        """check if two names are the same or name_2 contains name_1"""
        if not self.case_sensitive:
            name_1_tmp = name_1.lower()
            name_2_tmp = name_2.lower()
        else:
            name_1_tmp = name_1
            name_2_tmp = name_2
        if self.exact_match:
            return name_1_tmp == name_2_tmp
        else:
            return name_1_tmp in name_2_tmp

    def should_keep(self, feature: pygplates.Feature) -> bool:
        if self.exclude:
            for name in self.names:
                if self.check_name(name, feature.get_name()):
                    return False
            return True
        else:
            for name in self.names:
                if self.check_name(name, feature.get_name()):
                    return True
            return False

Ancestors

Methods

def check_name(self, name_1: str, name_2: str) -> bool

check if two names are the same or name_2 contains name_1

Expand source code
def check_name(self, name_1: str, name_2: str) -> bool:
    """check if two names are the same or name_2 contains name_1"""
    if not self.case_sensitive:
        name_1_tmp = name_1.lower()
        name_2_tmp = name_2.lower()
    else:
        name_1_tmp = name_1
        name_2_tmp = name_2
    if self.exact_match:
        return name_1_tmp == name_2_tmp
    else:
        return name_1_tmp in name_2_tmp

Inherited members

class PlateIDFilter (pids: List[int], exclude=False)

filter features by plate ID

for example: PlateIDFilter([101,201,301]) – keep features whose plate id is 101 or 201 or 301 PlateIDFilter([101,201,301], exclude=True) – keep features whose plate id is not 101 nor 201 nor 301

Expand source code
class PlateIDFilter(FeatureFilter):
    """filter features by plate ID

    for example:
        PlateIDFilter([101,201,301]) -- keep features whose plate id is 101 or 201 or 301
        PlateIDFilter([101,201,301], exclude=True) -- keep features whose plate id is not 101 nor 201 nor 301

    """

    def __init__(self, pids: List[int], exclude=False):
        self.pids = pids
        self.exclude = exclude

    def should_keep(self, feature: pygplates.Feature) -> bool:
        if not self.exclude and feature.get_reconstruction_plate_id() in self.pids:
            return True
        if self.exclude and feature.get_reconstruction_plate_id() not in self.pids:
            return True
        return False

Ancestors

Inherited members