| #!/usr/bin/env python2 |
| |
| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Dumps info from a ExamplePreprocessorConfig protobuf file. |
| |
| Prints feature names, types, and bucket values. |
| """ |
| |
| import os |
| import sys |
| import textwrap |
| from enum import Enum |
| from google.protobuf import text_format |
| |
| class FeatureType(Enum): |
| CATEGORICAL = 'categorical' |
| BUCKETED = 'bucketed' |
| SCALAR = 'scalar' |
| |
| |
| def ReadConfig(pb_file): |
| """Parses the protobuf containing the example preprocessor config.""" |
| import example_preprocessor_pb2 |
| config = example_preprocessor_pb2.ExamplePreprocessorConfig() |
| with open(pb_file) as pb: |
| config.ParseFromString(pb.read()) |
| return config |
| |
| |
| def PrintExamplePreprocessorConfig(pb_file): |
| """Prints the features listed the example preprocessor config.""" |
| config = ReadConfig(pb_file) |
| |
| features = set() |
| for feature_index in sorted(config.feature_indices): |
| # For string or string list feature types, remove the "_value" suffix to get |
| # the base name. |
| name_parts = feature_index.split('_') |
| base_name = name_parts[0] |
| # Skip additional values of the same base name. |
| if base_name in features: |
| continue |
| |
| features.add(base_name) |
| if len(name_parts) == 1: |
| feature_type = FeatureType.SCALAR |
| elif base_name in config.bucketizers: |
| feature_type = FeatureType.BUCKETED |
| else: |
| feature_type = FeatureType.CATEGORICAL |
| description = '* %s (%s)' % (base_name, feature_type.value) |
| |
| if feature_type == FeatureType.BUCKETED: |
| description += ':\n\t' |
| boundaries = config.bucketizers[base_name].boundaries |
| bucket_str = ', '.join(['%.1f' % bucket for bucket in boundaries]) |
| |
| # Indent description by a tab and wrap text. |
| max_len = 80 - 8 # Leave at least 8 columns for tab width. |
| description += ('\n\t').join(textwrap.wrap(bucket_str, max_len)) |
| print description |
| return 0 |
| |
| |
| def Main(args): |
| if len(args) != 2: |
| print 'Usage: %s <out_dir> <path/to/example_preprocessor_config.pb>' % ( |
| __file__) |
| return 1 |
| |
| out_dir = args[0] |
| if not os.path.isdir(out_dir): |
| print 'Could not find out directory: %s' % out_dir |
| return 1 |
| |
| pb_file = args[1] |
| if not os.path.isfile(pb_file): |
| print 'Protobuf file not found: %s' % pb_file |
| return 1 |
| |
| proto_dir = os.path.join(out_dir, 'pyproto/components/assist_ranker/proto') |
| if not os.path.isdir(proto_dir): |
| print 'Proto directory not found: %s' % proto_dir |
| print 'Build the "components/assist_ranker/proto" target' |
| print ' (usually built with chrome)' |
| return 1 |
| |
| # Allow importing the ExamplePreprocessorConfig proto definition. |
| sys.path.insert(0, proto_dir) |
| PrintExamplePreprocessorConfig(pb_file) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(Main(sys.argv[1:])) |