summaryrefslogtreecommitdiffstats
path: root/cmdline/detail/cmdline_parse_argument_detail.h
blob: 4b56804ea6ec9e6b49d7bdfc23115bc9b59d2bd7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
#define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_

#include <type_traits>
#include <assert.h>
#include <functional>
#include <vector>
#include <algorithm>
#include <numeric>
#include <memory>

#include "cmdline_parse_result.h"
#include "cmdline_types.h"
#include "token_range.h"
#include "unit.h"

namespace art {
  // Implementation details for the parser. Do not look inside if you hate templates.
  namespace detail {
    // A non-templated base class for argument parsers. Used by the general parser
    // to parse arguments, without needing to know the argument type at compile time.
    //
    // This is an application of the type erasure idiom.
    struct CmdlineParseArgumentAny {
      virtual ~CmdlineParseArgumentAny() {}

      // Attempt to parse this argument starting at arguments[position].
      // If the parsing succeeds, the parsed value will be saved as a side-effect.
      //
      // In most situations, the parsing will not match by returning kUnknown. In this case,
      // no tokens were consumed and the position variable will not be updated.
      //
      // At other times, parsing may fail due to validation but the initial token was still matched
      // (for example an out of range value, or passing in a string where an int was expected).
      // In this case the tokens are still consumed, and the position variable will get incremented
      // by all the consumed tokens.
      //
      // The # of tokens consumed by the parse attempt will be set as an out-parameter into
      // consumed_tokens. The parser should skip this many tokens before parsing the next
      // argument.
      virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
      // How many tokens should be taken off argv for parsing this argument.
      // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
      //
      // A [min,max] range is returned to represent argument definitions with multiple
      // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
      virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
      // Get the run-time typename of the argument type.
      virtual const char* GetTypeName() const = 0;
      // Try to do a close match, returning how many tokens were matched against this argument
      // definition. More tokens is better.
      //
      // Do a quick match token-by-token, and see if they match.
      // Any tokens with a wildcard in them are only matched up until the wildcard.
      // If this is true, then the wildcard matching later on can still fail, so this is not
      // a guarantee that the argument is correct, it's more of a strong hint that the
      // user-provided input *probably* was trying to match this argument.
      //
      // Returns how many tokens were either matched (or ignored because there was a
      // wildcard present). 0 means no match. If the Size() tokens are returned.
      virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
    };

    template <typename T>
    using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;

    template <typename T>
    using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;

    // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
    template <typename TArg>
    struct CmdlineParserArgumentInfo {
      // This version will only be used if TArg is arithmetic and thus has the <= operators.
      template <typename T = TArg>  // Necessary to get SFINAE to kick in.
      bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
        if (has_range_) {
          return min_ <= value && value <= max_;
        }
        return true;
      }

      // This version will be used at other times when TArg is not arithmetic.
      template <typename T = TArg>
      bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
        assert(!has_range_);
        return true;
      }

      // Do a quick match token-by-token, and see if they match.
      // Any tokens with a wildcard in them only match the prefix up until the wildcard.
      //
      // If this is true, then the wildcard matching later on can still fail, so this is not
      // a guarantee that the argument is correct, it's more of a strong hint that the
      // user-provided input *probably* was trying to match this argument.
      size_t MaybeMatches(TokenRange token_list) const {
        auto best_match = FindClosestMatch(token_list);

        return best_match.second;
      }

      // Attempt to find the closest match (see MaybeMatches).
      //
      // Returns the token range that was the closest match and the # of tokens that
      // this range was matched up until.
      std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
        const TokenRange* best_match_ptr = nullptr;

        size_t best_match = 0;
        for (auto&& token_range : tokenized_names_) {
          size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));

          if (this_match > best_match) {
            best_match_ptr = &token_range;
            best_match = this_match;
          }
        }

        return std::make_pair(best_match_ptr, best_match);
      }

      // Mark the argument definition as completed, do not mutate the object anymore after this
      // call is done.
      //
      // Performs several sanity checks and token calculations.
      void CompleteArgument() {
        assert(names_.size() >= 1);
        assert(!is_completed_);

        is_completed_ = true;

        size_t blank_count = 0;
        size_t token_count = 0;

        size_t global_blank_count = 0;
        size_t global_token_count = 0;
        for (auto&& name : names_) {
          std::string s(name);

          size_t local_blank_count = std::count(s.begin(), s.end(), '_');
          size_t local_token_count = std::count(s.begin(), s.end(), ' ');

          if (global_blank_count != 0) {
            assert(local_blank_count == global_blank_count
                   && "Every argument descriptor string must have same amount of blanks (_)");
          }

          if (local_blank_count != 0) {
            global_blank_count = local_blank_count;
            blank_count++;

            assert(local_blank_count == 1 && "More than one blank is not supported");
            assert(s.back() == '_' && "The blank character must only be at the end of the string");
          }

          if (global_token_count != 0) {
            assert(local_token_count == global_token_count
                   && "Every argument descriptor string must have same amount of tokens (spaces)");
          }

          if (local_token_count != 0) {
            global_token_count = local_token_count;
            token_count++;
          }

          // Tokenize every name, turning it from a string to a token list.
          tokenized_names_.clear();
          for (auto&& name1 : names_) {
            // Split along ' ' only, removing any duplicated spaces.
            tokenized_names_.push_back(
                TokenRange::Split(name1, {' '}).RemoveToken(" "));
          }

          // remove the _ character from each of the token ranges
          // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
          // and this is OK because we still need an empty token to simplify
          // range comparisons
          simple_names_.clear();

          for (auto&& tokenized_name : tokenized_names_) {
            simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
          }
        }

        if (token_count != 0) {
          assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
              token_count == names_.size()));
        }

        if (blank_count != 0) {
          assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
              blank_count == names_.size()));
        }

        using_blanks_ = blank_count > 0;
        {
          size_t smallest_name_token_range_size =
              std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
                              [](size_t min, const TokenRange& cur) {
                                return std::min(min, cur.Size());
                              });
          size_t largest_name_token_range_size =
              std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
                              [](size_t max, const TokenRange& cur) {
                                return std::max(max, cur.Size());
                              });

          token_range_size_ = std::make_pair(smallest_name_token_range_size,
                                             largest_name_token_range_size);
        }

        if (has_value_list_) {
          assert(names_.size() == value_list_.size()
                 && "Number of arg descriptors must match number of values");
          assert(!has_value_map_);
        }
        if (has_value_map_) {
          if (!using_blanks_) {
            assert(names_.size() == value_map_.size() &&
                   "Since no blanks were specified, each arg is mapped directly into a mapped "
                   "value without parsing; sizes must match");
          }

          assert(!has_value_list_);
        }

        if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
          assert((has_value_map_ || has_value_list_) &&
                 "Arguments without a blank (_) must provide either a value map or a value list");
        }

        TypedCheck();
      }

      // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
      std::vector<const char*> names_;
      // Is there at least 1 wildcard '_' in the argument definition?
      bool using_blanks_ = false;
      // [min, max] token counts in each arg def
      std::pair<size_t, size_t> token_range_size_;

      // contains all the names in a tokenized form, i.e. as a space-delimited list
      std::vector<TokenRange> tokenized_names_;

      // contains the tokenized names, but with the _ character stripped
      std::vector<TokenRange> simple_names_;

      // For argument definitions created with '.AppendValues()'
      // Meaning that parsing should mutate the existing value in-place if possible.
      bool appending_values_ = false;

      // For argument definitions created with '.WithRange(min, max)'
      bool has_range_ = false;
      TArg min_;
      TArg max_;

      // For argument definitions created with '.WithValueMap'
      bool has_value_map_ = false;
      std::vector<std::pair<const char*, TArg>> value_map_;

      // For argument definitions created with '.WithValues'
      bool has_value_list_ = false;
      std::vector<TArg> value_list_;

      // Make sure there's a default constructor.
      CmdlineParserArgumentInfo() = default;

      // Ensure there's a default move constructor.
      CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;

     private:
      // Perform type-specific checks at runtime.
      template <typename T = TArg>
      void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
        assert(!using_blanks_ &&
               "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
      }

      void TypedCheck() {}

      bool is_completed_ = false;
    };

    // A virtual-implementation of the necessary argument information in order to
    // be able to parse arguments.
    template <typename TArg>
    struct CmdlineParseArgument : CmdlineParseArgumentAny {
      CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
                           std::function<void(TArg&)>&& save_argument,
                           std::function<TArg&(void)>&& load_argument)
          : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
            save_argument_(std::forward<decltype(save_argument)>(save_argument)),
            load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
      }

      using UserTypeInfo = CmdlineType<TArg>;

      virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
        assert(arguments.Size() > 0);
        assert(consumed_tokens != nullptr);

        auto closest_match_res = argument_info_.FindClosestMatch(arguments);
        size_t best_match_size = closest_match_res.second;
        const TokenRange* best_match_arg_def = closest_match_res.first;

        if (best_match_size > arguments.Size()) {
          // The best match has more tokens than were provided.
          // Shouldn't happen in practice since the outer parser does this check.
          return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
        }

        assert(best_match_arg_def != nullptr);
        *consumed_tokens = best_match_arg_def->Size();

        if (!argument_info_.using_blanks_) {
          return ParseArgumentSingle(arguments.Join(' '));
        }

        // Extract out the blank value from arguments
        // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
        std::string blank_value = "";
        size_t idx = 0;
        for (auto&& def_token : *best_match_arg_def) {
          auto&& arg_token = arguments[idx];

          // Does this definition-token have a wildcard in it?
          if (def_token.find('_') == std::string::npos) {
            // No, regular token. Match 1:1 against the argument token.
            bool token_match = def_token == arg_token;

            if (!token_match) {
              return CmdlineResult(CmdlineResult::kFailure,
                                   std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
                                   + " at token " + std::to_string(idx));
            }
          } else {
            // This is a wild-carded token.
            TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});

            // Extract the wildcard contents out of the user-provided arg_token.
            std::unique_ptr<TokenRange> arg_matches =
                def_split_wildcards.MatchSubstrings(arg_token, "_");
            if (arg_matches == nullptr) {
              return CmdlineResult(CmdlineResult::kFailure,
                                   std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
                                   + ", with a wildcard pattern " + def_token
                                   + " at token " + std::to_string(idx));
            }

            // Get the corresponding wildcard tokens from arg_matches,
            // and concatenate it to blank_value.
            for (size_t sub_idx = 0;
                sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
              if (def_split_wildcards[sub_idx] == "_") {
                blank_value += arg_matches->GetToken(sub_idx);
              }
            }
          }

          ++idx;
        }

        return ParseArgumentSingle(blank_value);
      }

     private:
      virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
        // TODO: refactor to use LookupValue for the value lists/maps

        // Handle the 'WithValueMap(...)' argument definition
        if (argument_info_.has_value_map_) {
          for (auto&& value_pair : argument_info_.value_map_) {
            const char* name = value_pair.first;

            if (argument == name) {
              return SaveArgument(value_pair.second);
            }
          }

          // Error case: Fail, telling the user what the allowed values were.
          std::vector<std::string> allowed_values;
          for (auto&& value_pair : argument_info_.value_map_) {
            const char* name = value_pair.first;
            allowed_values.push_back(name);
          }

          std::string allowed_values_flat = Join(allowed_values, ',');
          return CmdlineResult(CmdlineResult::kFailure,
                               "Argument value '" + argument + "' does not match any of known valid"
                                "values: {" + allowed_values_flat + "}");
        }

        // Handle the 'WithValues(...)' argument definition
        if (argument_info_.has_value_list_) {
          size_t arg_def_idx = 0;
          for (auto&& value : argument_info_.value_list_) {
            auto&& arg_def_token = argument_info_.names_[arg_def_idx];

            if (arg_def_token == argument) {
              return SaveArgument(value);
            }
            ++arg_def_idx;
          }

          assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
                 "Number of named argument definitions must match number of values defined");

          // Error case: Fail, telling the user what the allowed values were.
          std::vector<std::string> allowed_values;
          for (auto&& arg_name : argument_info_.names_) {
            allowed_values.push_back(arg_name);
          }

          std::string allowed_values_flat = Join(allowed_values, ',');
          return CmdlineResult(CmdlineResult::kFailure,
                               "Argument value '" + argument + "' does not match any of known valid"
                                "values: {" + allowed_values_flat + "}");
        }

        // Handle the regular case where we parsed an unknown value from a blank.
        UserTypeInfo type_parser;

        if (argument_info_.appending_values_) {
          TArg& existing = load_argument_();
          CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);

          assert(!argument_info_.has_range_);

          return result;
        }

        CmdlineParseResult<TArg> result = type_parser.Parse(argument);

        if (result.IsSuccess()) {
          TArg& value = result.GetValue();

          // Do a range check for 'WithRange(min,max)' argument definition.
          if (!argument_info_.CheckRange(value)) {
            return CmdlineParseResult<TArg>::OutOfRange(
                value, argument_info_.min_, argument_info_.max_);
          }

          return SaveArgument(value);
        }

        // Some kind of type-specific parse error. Pass the result as-is.
        CmdlineResult raw_result = std::move(result);
        return raw_result;
      }

     public:
      virtual const char* GetTypeName() const {
        // TODO: Obviate the need for each type specialization to hardcode the type name
        return UserTypeInfo::Name();
      }

      // How many tokens should be taken off argv for parsing this argument.
      // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
      //
      // A [min,max] range is returned to represent argument definitions with multiple
      // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
      virtual std::pair<size_t, size_t> GetNumTokens() const {
        return argument_info_.token_range_size_;
      }

      // See if this token range might begin the same as the argument definition.
      virtual size_t MaybeMatches(const TokenRange& tokens) {
        return argument_info_.MaybeMatches(tokens);
      }

     private:
      CmdlineResult SaveArgument(const TArg& value) {
        assert(!argument_info_.appending_values_
               && "If the values are being appended, then the updated parse value is "
                   "updated by-ref as a side effect and shouldn't be stored directly");
        TArg val = value;
        save_argument_(val);
        return CmdlineResult(CmdlineResult::kSuccess);
      }

      CmdlineParserArgumentInfo<TArg> argument_info_;
      std::function<void(TArg&)> save_argument_;
      std::function<TArg&(void)> load_argument_;
    };
  } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2]
}  // namespace art

#endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_