From 40d7cd0fd78fe2004e2a53c4618c148339b02733 Mon Sep 17 00:00:00 2001 From: Jing Yu Date: Mon, 19 Dec 2011 16:56:54 -0800 Subject: Add gcc-4.6. Synced to @180989 Change-Id: Ie3676586e1d8e3c8cd9f07d022f450d05fa08439 svn://gcc.gnu.org/svn/gcc/branches/google/gcc-4_6-mobile --- gcc-4.6/gcc/ada/a-suezst.adb | 429 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100755 gcc-4.6/gcc/ada/a-suezst.adb (limited to 'gcc-4.6/gcc/ada/a-suezst.adb') diff --git a/gcc-4.6/gcc/ada/a-suezst.adb b/gcc-4.6/gcc/ada/a-suezst.adb new file mode 100755 index 000000000..40266fb92 --- /dev/null +++ b/gcc-4.6/gcc/ada/a-suezst.adb @@ -0,0 +1,429 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRINGS -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +package body Ada.Strings.UTF_Encoding.Wide_Wide_Strings is + use Interfaces; + + ------------ + -- Decode -- + ------------ + + -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_Wide_String + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_Wide_String + is + begin + if Input_Scheme = UTF_8 then + return Decode (Item); + else + return Decode (To_UTF_16 (Item, Input_Scheme)); + end if; + end Decode; + + -- Decode UTF-8 input to Wide_Wide_String + + function Decode (Item : UTF_8_String) return Wide_Wide_String is + Result : Wide_Wide_String (1 .. Item'Length); + -- Result string (worst case is same length as input) + + Len : Natural := 0; + -- Length of result stored so far + + Iptr : Natural; + -- Input string pointer + + C : Unsigned_8; + R : Unsigned_32; + + procedure Get_Continuation; + -- Reads a continuation byte of the form 10xxxxxx, shifts R left + -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On + -- return Ptr is incremented. Raises exception if continuation + -- byte does not exist or is invalid. + + ---------------------- + -- Get_Continuation -- + ---------------------- + + procedure Get_Continuation is + begin + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + else + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 2#10_000000# .. 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + else + R := Shift_Left (R, 6) or Unsigned_32 (C and 2#00_111111#); + end if; + end if; + end Get_Continuation; + + -- Start of processing for Decode + + begin + Iptr := Item'First; + + -- Skip BOM at start + + if Item'Length >= 3 + and then Item (Iptr .. Iptr + 2) = BOM_8 + then + Iptr := Iptr + 3; + + -- Error if bad BOM + + elsif Item'Length >= 2 + and then (Item (Iptr .. Iptr + 1) = BOM_16BE + or else + Item (Iptr .. Iptr + 1) = BOM_16LE) + then + Raise_Encoding_Error (Iptr); + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + R := Unsigned_32 (C); + + -- No initial code can be of the form 10xxxxxx. Such codes are used + -- only for continuations. + + elsif C <= 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 2#110_11111# then + R := Unsigned_32 (C and 2#000_11111#); + Get_Continuation; + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#1110_1111# then + R := Unsigned_32 (C and 2#0000_1111#); + Get_Continuation; + Get_Continuation; + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#11110_111# then + R := Unsigned_32 (C and 2#00000_111#); + Get_Continuation; + Get_Continuation; + Get_Continuation; + + -- Any other code is an error + + else + Raise_Encoding_Error (Iptr - 1); + end if; + + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (R); + end loop; + + return Result (1 .. Len); + end Decode; + + -- Decode UTF-16 input to Wide_Wide_String + + function Decode (Item : UTF_16_Wide_String) return Wide_Wide_String is + Result : Wide_Wide_String (1 .. Item'Length); + -- Result cannot be longer than the input string + + Len : Natural := 0; + -- Length of result + + Iptr : Natural; + -- Pointer to next element in Item + + C : Unsigned_16; + R : Unsigned_32; + + begin + -- Skip UTF-16 BOM at start + + Iptr := Item'First; + + if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then + Iptr := Iptr + 1; + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# + -- represent their own value. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (C); + + -- Codes in the range 16#D800#..16#DBFF# represent the first of the + -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". + -- The first surrogate provides 10 high order bits of the result. + + elsif C <= 16#DBFF# then + R := Shift_Left ((Unsigned_32 (C) - 16#D800#), 10); + + -- Error if at end of string + + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + -- Otherwise next character must be valid low order surrogate + -- which provides the low 10 order bits of the result. + + else + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 16#DC00# .. 16#DFFF# then + Raise_Encoding_Error (Iptr - 1); + + else + R := R or (Unsigned_32 (C) mod 2 ** 10); + + -- The final adjustment is to add 16#01_0000 to get the + -- result back in the required 21 bit range. + + R := R + 16#01_0000#; + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (R); + end if; + end if; + + -- Remaining codes are invalid + + else + Raise_Encoding_Error (Iptr - 1); + end if; + end loop; + + return Result (1 .. Len); + end Decode; + + ------------ + -- Encode -- + ------------ + + -- Encode Wide_Wide_String in UTF-8, UTF-16BE or UTF-16LE + + function Encode + (Item : Wide_Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String + is + begin + if Output_Scheme = UTF_8 then + return Encode (Item, Output_BOM); + else + return From_UTF_16 (Encode (Item), Output_Scheme, Output_BOM); + end if; + end Encode; + + -- Encode Wide_Wide_String in UTF-8 + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_8_String + is + Result : String (1 .. 4 * Item'Length + 3); + -- Worst case is four bytes per input byte + space for BOM + + Len : Natural; + -- Number of output codes stored in Result + + C : Unsigned_32; + -- Single input character + + procedure Store (C : Unsigned_32); + pragma Inline (Store); + -- Store one output code (input is in range 0 .. 255) + + ----------- + -- Store -- + ----------- + + procedure Store (C : Unsigned_32) is + begin + Len := Len + 1; + Result (Len) := Character'Val (C); + end Store; + + -- Start of processing for Encode + + begin + -- Output BOM if required + + if Output_BOM then + Result (1 .. 3) := BOM_8; + Len := 3; + else + Len := 0; + end if; + + -- Loop through characters of input + + for Iptr in Item'Range loop + C := To_Unsigned_32 (Item (Iptr)); + + -- Codes in the range 16#00#..16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + Store (C); + + -- Codes in the range 16#80#..16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 16#7FF# then + Store (2#110_00000# or Shift_Right (C, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#800#..16#D7FF# or 16#E000#..16#FFFD# are + -- represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Store (2#1110_0000# or Shift_Right (C, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + elsif C in 16#1_0000# .. 16#10_FFFF# then + Store (2#11110_000# or + Shift_Right (C, 18)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000_000000#, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or + (C and 2#00_111111#)); + + -- All other codes are invalid + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + + -- Encode Wide_Wide_String in UTF-16 + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String + is + Result : UTF_16_Wide_String (1 .. 2 * Item'Length + 1); + -- Worst case is each input character generates two output characters + -- plus one for possible BOM. + + Len : Integer; + -- Length of output string + + C : Unsigned_32; + + begin + -- Output BOM if needed + + if Output_BOM then + Result (1) := BOM_16 (1); + Len := 1; + else + Len := 0; + end if; + + -- Loop through input characters encoding them + + for Iptr in Item'Range loop + C := To_Unsigned_32 (Item (Iptr)); + + -- Codes in the range 16#00_0000#..16#00_D7FF# or 16#E000#..16#FFFD# + -- are output unchanged + + if C <= 16#00_D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in the range 16#01_0000#..16#10_FFFF# are output using two + -- surrogate characters. First 16#1_0000# is subtracted from the code + -- point to give a 20-bit value. This is then split into two separate + -- 10-bit values each of which is represented as a surrogate with the + -- most significant half placed in the first surrogate. The ranges of + -- values used for the two surrogates are 16#D800#-16#DBFF# for the + -- first, most significant surrogate and 16#DC00#-16#DFFF# for the + -- second, least significant surrogate. + + elsif C in 16#1_0000# .. 16#10_FFFF# then + C := C - 16#1_0000#; + + Len := Len + 1; + Result (Len) := Wide_Character'Val (16#D800# + C / 2 ** 10); + + Len := Len + 1; + Result (Len) := Wide_Character'Val (16#DC00# + C mod 2 ** 10); + + -- All other codes are invalid + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + +end Ada.Strings.UTF_Encoding.Wide_Wide_Strings; -- cgit v1.2.3