Source code for sourced.ml.core.extractors.identifier_distance
from typing import Iterable, Tuple
import bblfsh.compat as bblfsh
from sourced.ml.core.algorithms.token_parser import NoopTokenParser
from sourced.ml.core.algorithms.uast_id_distance import (Uast2IdDistance, Uast2IdLineDistance,
Uast2IdTreeDistance)
from sourced.ml.core.extractors.bags_extractor import BagsExtractor
[docs]class IdentifierDistance(BagsExtractor):
"""
Extractor wrapper for Uast2IdTreeDistance and Uast2IdLineDistance algorithm.
Note that this is an unusual BagsExtractor since it returns iterable instead of bag.
The class did not wrap with @register_extractor because it does not produce bags as others do.
So nobody outside code will see it or use it directly.
For the same reason we a free to override NAMESPACE, NAME, OPTS fields with any value we want.
TODO(zurk): Split BagsExtractor into two clases: Extractor and BagsExtractor(Extractor),
re-inherit this class from Extractor, delete explanations from docstring.
"""
[docs] NAME = "Identifier distance"
[docs] DEFAULT_MAX_DISTANCE = Uast2IdDistance.DEFAULT_MAX_DISTANCE
[docs] class DistanceType:
@staticmethod
[docs] def resolve(type):
if type == IdentifierDistance.DistanceType.Line:
return Uast2IdLineDistance
if type == IdentifierDistance.DistanceType.Tree:
return Uast2IdTreeDistance
raise ValueError("Unknown distance type: %s" % type)
def __init__(self, split_stem=False, type="tree", max_distance=DEFAULT_MAX_DISTANCE, **kwargs):
super().__init__(**kwargs)
Uast2IdDistance = self.DistanceType.resolve(type)
self.uast2id_distance = Uast2IdDistance(
token_parser=NoopTokenParser() if not split_stem else None,
max_distance=max_distance)