summaryrefslogtreecommitdiffstats
path: root/include/minikin/WordBreaker.h
blob: 8c0050236e2556f1e1074a6c86d6d2a4bb383d1e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * A wrapper around ICU's line break iterator, that gives customized line
 * break opportunities, as well as identifying words for the purpose of
 * hyphenation.
 */

#ifndef MINIKIN_WORD_BREAKER_H
#define MINIKIN_WORD_BREAKER_H

#include "unicode/brkiter.h"
#include <memory>

namespace android {

class WordBreaker {
public:
    ~WordBreaker() {
        finish();
    }

    void setLocale(const icu::Locale& locale);

    void setText(const uint16_t* data, size_t size);

    // Advance iterator to next word break. Return offset, or -1 if EOT
    ssize_t next();

    // Current offset of iterator, equal to 0 at BOT or last return from next()
    ssize_t current() const;

    // After calling next(), wordStart() and wordEnd() are offsets defining the previous
    // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation.
    ssize_t wordStart() const;

    ssize_t wordEnd() const;

    void finish();

private:
    std::unique_ptr<icu::BreakIterator> mBreakIterator;
    UText mUText = UTEXT_INITIALIZER;
    const uint16_t* mText = nullptr;
    size_t mTextSize;
    ssize_t mLast;
    ssize_t mCurrent;
    bool mIteratorWasReset;

    // state for the email address / url detector
    ssize_t mScanOffset;
    bool mSuppressHyphen;
};

}  // namespace

#endif  // MINIKIN_WORD_BREAKER_H