Source code for mtap.processing.descriptions

# Copyright 2019 Regents of the University of Minnesota.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Descriptors for processor functionality."""
import typing
from typing import NamedTuple, Optional, List, Type, Dict, Any

if typing.TYPE_CHECKING:
    from mtap import EventProcessor

__all__ = [
    'label_property',
    'labels',
    'parameter',
    'processor',
]


[docs]class label_property(NamedTuple('label_property', [('name', str), ('description', Optional[str]), ('data_type', Optional[str]), ('nullable', bool)])): """Creates a description for a property on a label. Args: name (str): The property's name. Keyword Args: description (~typing.Optional[str]): A short description of the property. data_type (~typing.Optional[str]): The data type of the property: str, float, or bool; List[T] or Mapping[T1, T2] of those. nullable (bool): Whether the property can have a valid value of null. """ def __new__(cls, name: str, *, nullable: bool = False, description: Optional[str] = None, data_type: Optional[str] = None): return super().__new__(cls, name, description, data_type, nullable)
[docs]class labels(NamedTuple('LabelDescription', [('name', str), ('reference', Optional[str]), ('name_from_parameter', Optional[str]), ('optional', bool), ('description', Optional[str]), ('properties', List[label_property])])): """A description for a label type. Args: name (str): The label index name. Keyword Args: reference (~typing.Optional[str]): If this is an output of another processor, that processor's name followed by a slash and the default output name of the index go here. Example: "sentence-detector/sentences". optional (bool): Whether this label index is an optional input or output. name_from_parameter (~typing.Optional[str]): If the label index gets its name from a processor parameter, the name of the parameter. description (~typing.Optional[str]): A short description of the label index. properties (~typing.Optional[~typing.List[label_property]]): The properties of the labels in the label index. Attributes: name (str): The label index name. reference (~typing.Optional[str]): If this is an output of another processor, that processor's name followed by a slash and the default output name of the index go here. Example: "sentence-detector/sentences". optional (bool): Whether this label index is an optional input or output. name_from_parameter (~typing.Optional[str]): If the label index gets its name from a processor parameter, the name of the parameter. description (~typing.Optional[str]): A short description of the label index. properties (~typing.Optional[~typing.List[label_property]]): The properties of the labels in the label index. """ def __new__(cls, name: str, *, reference: Optional[str] = None, optional: bool = False, name_from_parameter: Optional[str] = None, description: Optional[str] = None, properties: Optional[List[label_property]] = None): return super().__new__(cls, name, reference, name_from_parameter, optional, description, properties)
[docs]class parameter(NamedTuple('parameter', [('name', str), ('description', Optional[str]), ('data_type', Optional[str]), ('required', bool)])): def __new__(cls, name: str, *, required: bool = False, data_type: Optional[str] = None, description: Optional[str] = None): """A description of one of the processor's parameters. Args: name (str): The parameter name / key. Keyword Args: required (bool): Whether the processor parameter is required. data_type (~typing.Optional[str]): The data type of the parameter. str, float, or bool; List[T] or Mapping[T1, T2] of those. description (~typing.Optional[str]): A short description of the property and what it does. Attributes: name (str): The parameter name / key. required (bool): Whether the processor parameter is required. data_type (~typing.Optional[str]): The data type of the parameter. str, float, or bool; List[T] or Mapping[T1, T2] of those. description (~typing.Optional[str]): A short description of the property and what it does. Returns: ParameterDescription: The parameter description object """ return super().__new__(cls, name=name, description=description, data_type=data_type, required=required)
[docs]def processor(name: str, *, human_name: Optional[str] = None, description: Optional[str] = None, parameters: Optional[List[parameter]] = None, inputs: Optional[List[labels]] = None, outputs: Optional[List[labels]] = None, **additional_metadata: str): """Decorator which attaches a service name and metadata to a processor. Which then can be used for runtime reflection of how the processor works. Args: name (str): Identifying service name both for launching via command line and for service registration. Should be a mix of alphanumeric characters and dashes so that it plays nice with the DNS name requirements of service discovery tools like Consul. Can be overridden at runtime via the `identifier` option on :func:`processor_parser`. Keyword Args: human_name (~typing.Optional[str]): An option human name for the processor. description (~typing.Optional[str]): A short description of the processor and what it does. parameters (~typing.Optional[~typing.List[ParameterDescription]]): The processor's parameters. inputs (~typing.Optional[~typing.List[str]]): String identifiers for the output from a processor that this processor uses as an input. Takes the format "[processor-name]/[output]". Examples would be "tagger:pos_tags" or "sentence-detector:sentences". outputs (~typing.Optional[~typing.List[LabelDescription]]): The label indices this processor outputs. **additional_metadata (~typing.Any): Any other data that should be added to the processor's metadata, should be serializable to yaml and json. Returns: A decorator to be applied to instances of EventProcessor or DocumentProcessor. This decorator attaches the metadata so it can be reflected at runtime. Examples: >>> from mtap.processing import EventProcessor >>> @processor('example-text-converter') >>> class TextConverter(EventProcessor): >>> ... or >>> from mtap.processing import DocumentProcessor >>> @processor('example-sentence-detector') >>> class SentenceDetector(DocumentProcessor): >>> ... From our own example processor: >>> from mtap.processing import DocumentProcessor >>> @processor('mtap-example-processor-python', >>> human_name="Python Example Processor", >>> description="counts the number of times the letters a and b occur in a document", >>> parameters=[ >>> parameter('do_work', required=True, data_type='bool', >>> description="Whether the processor should do anything.") >>> ], >>> outputs=[ >>> labels('mtap.examples.letter_counts', >>> properties=[label_property('letter', data_type='str'), >>> label_property('count', data_type='int')]) >>> ]) >>> class ExampleProcessor(DocumentProcessor): >>> ... """ def decorator(f: Type['EventProcessor']) -> Type['EventProcessor']: f.metadata = { 'name': name, 'human_name': human_name, 'description': description } if parameters is not None: f.metadata['parameters'] = [_parameter_to_map(p) for p in parameters] if inputs is not None: f.metadata['inputs'] = [_desc_to_dict(desc) for desc in inputs] if outputs is not None: f.metadata['outputs'] = [_desc_to_dict(desc) for desc in outputs] if additional_metadata is not None: f.metadata.update(additional_metadata) if 'implementation_lang' not in f.metadata: f.metadata['implementation_lang'] = 'Python' return f return decorator
def _desc_to_dict(description: labels) -> dict: m = { 'name': description.name } if description.name_from_parameter is not None: m['name_from_parameter'] = description.name_from_parameter if description.reference is not None: m['reference'] = description.reference if description.optional is not None: m['optional'] = description.optional if description.description is not None: m['description'] = description.description if description.properties is not None: m['properties'] = [_prop_to_dict(p) for p in description.properties] return m def _prop_to_dict(p: label_property) -> Dict[str, Any]: m = { 'name': p.name } if p.description is not None: m['description'] = p.description if p.data_type is not None: m['data_type'] = p.data_type if p.nullable is not None: m['nullable'] = p.nullable return m def _parameter_to_map(p: parameter) -> Dict[str, Any]: m = { 'name': p.name } if p.description is not None: m['description'] = p.description if p.data_type is not None: m['data_type'] = p.data_type if p.required is not None: m['required'] = p.required return m