Source code for sourced.ml.core.extractors.id_sequence
from typing import Iterable
import bblfsh.compat as bblfsh
from sourced.ml.core.algorithms.token_parser import NoopTokenParser
from sourced.ml.core.algorithms.uast_to_id_sequence import Uast2IdSequence
from sourced.ml.core.extractors.bags_extractor import BagsExtractor
[docs]class IdSequenceExtractor(BagsExtractor):
"""
Extractor wrapper for Uast2RoleIdPairs algorithm.
Note that this is unusual BagsExtractor since it returns iterable instead of bag.
The class did not wrap with @register_extractor because it does not produce bags as others do.
So nobody outside code will see it or use it directly.
For the same reason we a free to override NAMESPACE, NAME, OPTS fields with any value we want.
TODO(zurk): Split BagsExtractor into two clases: Extractor and BagsExtractor(Extractor),
re-inherit this class from Extractor, delete explanations from docstring.
"""
def __init__(self, split_stem=False, **kwargs):
super().__init__(**kwargs)
self.uast2id_sequence = Uast2IdSequence(
None, NoopTokenParser() if not split_stem else None)
[docs] def extract(self, uast: bblfsh.Node) -> Iterable[str]:
yield self.uast2id_sequence(uast), None