summaryrefslogtreecommitdiffstats
path: root/java/gov/nist/core/HostNameParser.java
blob: 5feddd1178ab920083cb69416a9970438f163f1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
/*
* Conditions Of Use
*
* This software was developed by employees of the National Institute of
* Standards and Technology (NIST), an agency of the Federal Government.
* Pursuant to title 15 Untied States Code Section 105, works of NIST
* employees are not subject to copyright protection in the United States
* and are considered to be in the public domain.  As a result, a formal
* license is not needed to use the software.
*
* This software is provided by NIST as a service and is expressly
* provided "AS IS."  NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
* OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
* AND DATA ACCURACY.  NIST does not warrant or make any representations
* regarding the use of the software or the results thereof, including but
* not limited to the correctness, accuracy, reliability or usefulness of
* the software.
*
* Permission to use this software is contingent upon your acceptance
* of the terms of this agreement
*
* .
*
*/
/*
 *
 * IPv6 Support added by Emil Ivov (emil_ivov@yahoo.com)<br/>
 * Network Research Team (http://www-r2.u-strasbg.fr))<br/>
 * Louis Pasteur University - Strasbourg - France<br/>
 *
 *Bug fixes for corner cases were contributed by Thomas Froment.
 */
package gov.nist.core;

// BEGIN android-deleted
//import gov.nist.javax.sdp.parser.Lexer;
// END android-deleted

import java.text.ParseException;

/**
 * Parser for host names.
 *
 *@version 1.2
 *
 *@author M. Ranganathan
 */

public class HostNameParser extends ParserCore {
// BEGIN android-added
    private static LexerCore Lexer;
// END android-added

    /**
     * Determines whether or not we should tolerate and strip address scope
     * zones from IPv6 addresses. Address scope zones are sometimes returned
     * at the end of IPv6 addresses generated by InetAddress.getHostAddress().
     * They are however not part of the SIP semantics so basically this method
     * determines whether or not the parser should be stripping them (as
     * opposed simply being blunt and throwing an exception).
     */
    private boolean stripAddressScopeZones = false;

    public HostNameParser(String hname) {
        this.lexer = new LexerCore("charLexer", hname);

        stripAddressScopeZones
            = Boolean.getBoolean("gov.nist.core.STRIP_ADDR_SCOPES");
    }

    /**
     * The lexer is initialized with the buffer.
     */
    public HostNameParser(LexerCore lexer) {
        this.lexer = lexer;
        lexer.selectLexer("charLexer");

        stripAddressScopeZones
            = Boolean.getBoolean("gov.nist.core.STRIP_ADDR_SCOPES");
    }

    private static final char[] VALID_DOMAIN_LABEL_CHAR =
        new char[] {LexerCore.ALPHADIGIT_VALID_CHARS, '-', '.'};
    protected void consumeDomainLabel() throws ParseException {
        if (debug)
            dbg_enter("domainLabel");
        try {
            lexer.consumeValidChars(VALID_DOMAIN_LABEL_CHAR);
        } finally {
            if (debug)
                dbg_leave("domainLabel");
        }
    }

    protected String ipv6Reference() throws ParseException {
        StringBuffer retval = new StringBuffer();
        if (debug)
            dbg_enter("ipv6Reference");

        try {

            if(stripAddressScopeZones){
                while (lexer.hasMoreChars()) {
                    char la = lexer.lookAhead(0);
                    //'%' is ipv6 address scope zone. see detail at
                    //java.sun.com/j2se/1.5.0/docs/api/java/net/Inet6Address.html
                    if (LexerCore.isHexDigit(la) || la == '.' || la == ':'
                            || la == '[' ) {
                        lexer.consume(1);
                        retval.append(la);
                    } else if (la == ']') {
                        lexer.consume(1);
                        retval.append(la);
                        return retval.toString();
                    } else if (la == '%'){
                        //we need to strip the address scope zone.
                        lexer.consume(1);

                        String rest = lexer.getRest();

                        if(rest == null || rest.length() == 0){
                            //head for the parse exception
                            break;
                        }

                        //we strip everything until either the end of the string
                        //or a closing square bracket (])
                        int stripLen = rest.indexOf(']');

                        if (stripLen == -1){
                            //no square bracket -> not a valid ipv6 reference
                            break;
                        }

                        lexer.consume(stripLen+1);
                        retval.append("]");
                        return retval.toString();

                    } else
                        break;
                }
            }
            else
            {
                while (lexer.hasMoreChars())
                {
                    char la = lexer.lookAhead(0);
                    if (LexerCore.isHexDigit(la) || la == '.'
                            || la == ':' || la == '[') {
                        lexer.consume(1);
                        retval.append(la);
                    } else if (la == ']') {
                        lexer.consume(1);
                        retval.append(la);
                        return retval.toString();
                    } else
                    break;
                }
            }

            throw new ParseException(
                lexer.getBuffer() + ": Illegal Host name ",
                lexer.getPtr());
        } finally {
            if (debug)
                dbg_leave("ipv6Reference");
        }
    }

    public Host host() throws ParseException {
        if (debug)
            dbg_enter("host");
        try {
            String hostname;

            //IPv6 referene
            if (lexer.lookAhead(0) == '[') {
                hostname = ipv6Reference();
            }
            //IPv6 address (i.e. missing square brackets)
            else if( isIPv6Address(lexer.getRest()) )
            {
                int startPtr = lexer.getPtr();
                lexer.consumeValidChars(
                        new char[] {LexerCore.ALPHADIGIT_VALID_CHARS, ':'});
                hostname
                    = new StringBuffer("[").append(
                        lexer.getBuffer().substring(startPtr, lexer.getPtr()))
                        .append("]").toString();
            }
            //IPv4 address or hostname
            else {
                int startPtr = lexer.getPtr();
                consumeDomainLabel();
                hostname = lexer.getBuffer().substring(startPtr, lexer.getPtr());
            }

            if (hostname.length() == 0)
                throw new ParseException(
                    lexer.getBuffer() + ": Missing host name",
                    lexer.getPtr());
            else
                return new Host(hostname);
        } finally {
            if (debug)
                dbg_leave("host");
        }
    }

    /**
     * Tries to determine whether the address in <tt>uriHeader</tt> could be
     * an IPv6 address by counting the number of colons that appear in it.
     *
     * @param uriHeader the string (supposedly the value of a URI header) that
     * we have received for parsing.
     *
     * @return true if the host part of <tt>uriHeader</tt> could be an IPv6
     * address (i.e. contains at least two colons) and false otherwise.
     */
    private boolean isIPv6Address(String uriHeader)
    {
        // approximately detect the end the host part.
        //first check if we have an uri param
        int hostEnd = uriHeader.indexOf(Lexer.QUESTION);

        //if not or if it appears after a semi-colon then the end of the
        //address would be a header param.
        int semiColonIndex = uriHeader.indexOf(Lexer.SEMICOLON);
        if ( hostEnd == -1
            || (semiColonIndex!= -1 && hostEnd > semiColonIndex) )
            hostEnd = semiColonIndex;

        //if there was no header param either the address
        //continues until the end of the string
        if ( hostEnd == -1 )
            hostEnd = uriHeader.length();

        //extract the address
        String host = uriHeader.substring(0, hostEnd);

        int firstColonIndex = host.indexOf(Lexer.COLON);

        if(firstColonIndex == -1)
            return false;

        int secondColonIndex = host.indexOf(Lexer.COLON, firstColonIndex + 1);

        if(secondColonIndex == -1)
            return false;

        return true;
    }
    /**
     * Parses a host:port string
     *
     * @param allowWS - whether whitespace is allowed around ':', only true for Via headers
     * @return
     * @throws ParseException
     */
    public HostPort hostPort( boolean allowWS ) throws ParseException {
        if (debug)
            dbg_enter("hostPort");
        try {
            Host host = this.host();
            HostPort hp = new HostPort();
            hp.setHost(host);
            // Has a port?
            if (allowWS) lexer.SPorHT(); // white space before ":port" should be accepted
            if (lexer.hasMoreChars()) {
                char la = lexer.lookAhead(0);
                switch (la)
                {
                case ':':
                    lexer.consume(1);
                    if (allowWS) lexer.SPorHT(); // white space before port number should be accepted
                    try {
                        String port = lexer.number();
                        hp.setPort(Integer.parseInt(port));
                    } catch (NumberFormatException nfe) {
                        throw new ParseException(
                            lexer.getBuffer() + " :Error parsing port ",
                            lexer.getPtr());
                    }
                    break;

                case ',':	// allowed in case of multi-headers, e.g. Route
                			// Could check that current header is a multi hdr
                    
                case ';':   // OK, can appear in URIs (parameters)
                case '?':   // same, header parameters
                case '>':   // OK, can appear in headers
                case ' ':   // OK, allow whitespace
                case '\t':
                case '\r':
                case '\n':
                case '/':   // e.g. http://[::1]/xyz.html
                    break;
                case '%':
                    if(stripAddressScopeZones){
                        break;//OK,allow IPv6 address scope zone
                    }
                    
                default:
                    if (!allowWS) {
                        throw new ParseException( lexer.getBuffer() +
                                " Illegal character in hostname:" + lexer.lookAhead(0),
                                lexer.getPtr() );
                    }
                }
            }
            return hp;
        } finally {
            if (debug)
                dbg_leave("hostPort");
        }
    }

    public static void main(String args[]) throws ParseException {
        String hostNames[] =
            {
                "foo.bar.com:1234",
                "proxima.chaplin.bt.co.uk",
                "129.6.55.181:2345",
                ":1234",
                "foo.bar.com:         1234",
                "foo.bar.com     :      1234   ",
                "MIK_S:1234"
            };

        for (int i = 0; i < hostNames.length; i++) {
            try {
                HostNameParser hnp = new HostNameParser(hostNames[i]);
                HostPort hp = hnp.hostPort(true);
                System.out.println("["+hp.encode()+"]");
            } catch (ParseException ex) {
                System.out.println("exception text = " + ex.getMessage());
            }
        }

    }
}