aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8.3/gcc/ada/a-suezst.adb
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.8.3/gcc/ada/a-suezst.adb')
-rw-r--r--gcc-4.8.3/gcc/ada/a-suezst.adb429
1 files changed, 429 insertions, 0 deletions
diff --git a/gcc-4.8.3/gcc/ada/a-suezst.adb b/gcc-4.8.3/gcc/ada/a-suezst.adb
new file mode 100644
index 000000000..81d0f670f
--- /dev/null
+++ b/gcc-4.8.3/gcc/ada/a-suezst.adb
@@ -0,0 +1,429 @@
+------------------------------------------------------------------------------
+-- --
+-- GNAT RUN-TIME COMPONENTS --
+-- --
+-- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRINGS --
+-- --
+-- B o d y --
+-- --
+-- Copyright (C) 2010-2012, Free Software Foundation, Inc. --
+-- --
+-- GNAT is free software; you can redistribute it and/or modify it under --
+-- terms of the GNU General Public License as published by the Free Soft- --
+-- ware Foundation; either version 3, or (at your option) any later ver- --
+-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
+-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
+-- or FITNESS FOR A PARTICULAR PURPOSE. --
+-- --
+-- As a special exception under Section 7 of GPL version 3, you are granted --
+-- additional permissions described in the GCC Runtime Library Exception, --
+-- version 3.1, as published by the Free Software Foundation. --
+-- --
+-- You should have received a copy of the GNU General Public License and --
+-- a copy of the GCC Runtime Library Exception along with this program; --
+-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
+-- <http://www.gnu.org/licenses/>. --
+-- --
+-- GNAT was originally developed by the GNAT team at New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc. --
+-- --
+------------------------------------------------------------------------------
+
+package body Ada.Strings.UTF_Encoding.Wide_Wide_Strings is
+ use Interfaces;
+
+ ------------
+ -- Decode --
+ ------------
+
+ -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_Wide_String
+
+ function Decode
+ (Item : UTF_String;
+ Input_Scheme : Encoding_Scheme) return Wide_Wide_String
+ is
+ begin
+ if Input_Scheme = UTF_8 then
+ return Decode (Item);
+ else
+ return Decode (To_UTF_16 (Item, Input_Scheme));
+ end if;
+ end Decode;
+
+ -- Decode UTF-8 input to Wide_Wide_String
+
+ function Decode (Item : UTF_8_String) return Wide_Wide_String is
+ Result : Wide_Wide_String (1 .. Item'Length);
+ -- Result string (worst case is same length as input)
+
+ Len : Natural := 0;
+ -- Length of result stored so far
+
+ Iptr : Natural;
+ -- Input string pointer
+
+ C : Unsigned_8;
+ R : Unsigned_32;
+
+ procedure Get_Continuation;
+ -- Reads a continuation byte of the form 10xxxxxx, shifts R left by 6
+ -- bits, and or's in the xxxxxx to the low order 6 bits. On return Ptr
+ -- is incremented. Raises exception if continuation byte does not exist
+ -- or is invalid.
+
+ ----------------------
+ -- Get_Continuation --
+ ----------------------
+
+ procedure Get_Continuation is
+ begin
+ if Iptr > Item'Last then
+ Raise_Encoding_Error (Iptr - 1);
+
+ else
+ C := To_Unsigned_8 (Item (Iptr));
+ Iptr := Iptr + 1;
+
+ if C not in 2#10_000000# .. 2#10_111111# then
+ Raise_Encoding_Error (Iptr - 1);
+ else
+ R := Shift_Left (R, 6) or Unsigned_32 (C and 2#00_111111#);
+ end if;
+ end if;
+ end Get_Continuation;
+
+ -- Start of processing for Decode
+
+ begin
+ Iptr := Item'First;
+
+ -- Skip BOM at start
+
+ if Item'Length >= 3
+ and then Item (Iptr .. Iptr + 2) = BOM_8
+ then
+ Iptr := Iptr + 3;
+
+ -- Error if bad BOM
+
+ elsif Item'Length >= 2
+ and then (Item (Iptr .. Iptr + 1) = BOM_16BE
+ or else
+ Item (Iptr .. Iptr + 1) = BOM_16LE)
+ then
+ Raise_Encoding_Error (Iptr);
+ end if;
+
+ -- Loop through input characters
+
+ while Iptr <= Item'Last loop
+ C := To_Unsigned_8 (Item (Iptr));
+ Iptr := Iptr + 1;
+
+ -- Codes in the range 16#00# - 16#7F# are represented as
+ -- 0xxxxxxx
+
+ if C <= 16#7F# then
+ R := Unsigned_32 (C);
+
+ -- No initial code can be of the form 10xxxxxx. Such codes are used
+ -- only for continuations.
+
+ elsif C <= 2#10_111111# then
+ Raise_Encoding_Error (Iptr - 1);
+
+ -- Codes in the range 16#80# - 16#7FF# are represented as
+ -- 110yyyxx 10xxxxxx
+
+ elsif C <= 2#110_11111# then
+ R := Unsigned_32 (C and 2#000_11111#);
+ Get_Continuation;
+
+ -- Codes in the range 16#800# - 16#FFFF# are represented as
+ -- 1110yyyy 10yyyyxx 10xxxxxx
+
+ elsif C <= 2#1110_1111# then
+ R := Unsigned_32 (C and 2#0000_1111#);
+ Get_Continuation;
+ Get_Continuation;
+
+ -- Codes in the range 16#10000# - 16#10FFFF# are represented as
+ -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
+
+ elsif C <= 2#11110_111# then
+ R := Unsigned_32 (C and 2#00000_111#);
+ Get_Continuation;
+ Get_Continuation;
+ Get_Continuation;
+
+ -- Any other code is an error
+
+ else
+ Raise_Encoding_Error (Iptr - 1);
+ end if;
+
+ Len := Len + 1;
+ Result (Len) := Wide_Wide_Character'Val (R);
+ end loop;
+
+ return Result (1 .. Len);
+ end Decode;
+
+ -- Decode UTF-16 input to Wide_Wide_String
+
+ function Decode (Item : UTF_16_Wide_String) return Wide_Wide_String is
+ Result : Wide_Wide_String (1 .. Item'Length);
+ -- Result cannot be longer than the input string
+
+ Len : Natural := 0;
+ -- Length of result
+
+ Iptr : Natural;
+ -- Pointer to next element in Item
+
+ C : Unsigned_16;
+ R : Unsigned_32;
+
+ begin
+ -- Skip UTF-16 BOM at start
+
+ Iptr := Item'First;
+
+ if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
+ Iptr := Iptr + 1;
+ end if;
+
+ -- Loop through input characters
+
+ while Iptr <= Item'Last loop
+ C := To_Unsigned_16 (Item (Iptr));
+ Iptr := Iptr + 1;
+
+ -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD#
+ -- represent their own value.
+
+ if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then
+ Len := Len + 1;
+ Result (Len) := Wide_Wide_Character'Val (C);
+
+ -- Codes in the range 16#D800#..16#DBFF# represent the first of the
+ -- two surrogates used to encode the range 16#01_000#..16#10_FFFF".
+ -- The first surrogate provides 10 high order bits of the result.
+
+ elsif C <= 16#DBFF# then
+ R := Shift_Left ((Unsigned_32 (C) - 16#D800#), 10);
+
+ -- Error if at end of string
+
+ if Iptr > Item'Last then
+ Raise_Encoding_Error (Iptr - 1);
+
+ -- Otherwise next character must be valid low order surrogate
+ -- which provides the low 10 order bits of the result.
+
+ else
+ C := To_Unsigned_16 (Item (Iptr));
+ Iptr := Iptr + 1;
+
+ if C not in 16#DC00# .. 16#DFFF# then
+ Raise_Encoding_Error (Iptr - 1);
+
+ else
+ R := R or (Unsigned_32 (C) mod 2 ** 10);
+
+ -- The final adjustment is to add 16#01_0000 to get the
+ -- result back in the required 21 bit range.
+
+ R := R + 16#01_0000#;
+ Len := Len + 1;
+ Result (Len) := Wide_Wide_Character'Val (R);
+ end if;
+ end if;
+
+ -- Remaining codes are invalid
+
+ else
+ Raise_Encoding_Error (Iptr - 1);
+ end if;
+ end loop;
+
+ return Result (1 .. Len);
+ end Decode;
+
+ ------------
+ -- Encode --
+ ------------
+
+ -- Encode Wide_Wide_String in UTF-8, UTF-16BE or UTF-16LE
+
+ function Encode
+ (Item : Wide_Wide_String;
+ Output_Scheme : Encoding_Scheme;
+ Output_BOM : Boolean := False) return UTF_String
+ is
+ begin
+ if Output_Scheme = UTF_8 then
+ return Encode (Item, Output_BOM);
+ else
+ return From_UTF_16 (Encode (Item), Output_Scheme, Output_BOM);
+ end if;
+ end Encode;
+
+ -- Encode Wide_Wide_String in UTF-8
+
+ function Encode
+ (Item : Wide_Wide_String;
+ Output_BOM : Boolean := False) return UTF_8_String
+ is
+ Result : String (1 .. 4 * Item'Length + 3);
+ -- Worst case is four bytes per input byte + space for BOM
+
+ Len : Natural;
+ -- Number of output codes stored in Result
+
+ C : Unsigned_32;
+ -- Single input character
+
+ procedure Store (C : Unsigned_32);
+ pragma Inline (Store);
+ -- Store one output code (input is in range 0 .. 255)
+
+ -----------
+ -- Store --
+ -----------
+
+ procedure Store (C : Unsigned_32) is
+ begin
+ Len := Len + 1;
+ Result (Len) := Character'Val (C);
+ end Store;
+
+ -- Start of processing for Encode
+
+ begin
+ -- Output BOM if required
+
+ if Output_BOM then
+ Result (1 .. 3) := BOM_8;
+ Len := 3;
+ else
+ Len := 0;
+ end if;
+
+ -- Loop through characters of input
+
+ for Iptr in Item'Range loop
+ C := To_Unsigned_32 (Item (Iptr));
+
+ -- Codes in the range 16#00#..16#7F# are represented as
+ -- 0xxxxxxx
+
+ if C <= 16#7F# then
+ Store (C);
+
+ -- Codes in the range 16#80#..16#7FF# are represented as
+ -- 110yyyxx 10xxxxxx
+
+ elsif C <= 16#7FF# then
+ Store (2#110_00000# or Shift_Right (C, 6));
+ Store (2#10_000000# or (C and 2#00_111111#));
+
+ -- Codes in the range 16#800#..16#D7FF# or 16#E000#..16#FFFD# are
+ -- represented as
+ -- 1110yyyy 10yyyyxx 10xxxxxx
+
+ elsif C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then
+ Store (2#1110_0000# or Shift_Right (C, 12));
+ Store (2#10_000000# or
+ Shift_Right (C and 2#111111_000000#, 6));
+ Store (2#10_000000# or (C and 2#00_111111#));
+
+ -- Codes in the range 16#10000# - 16#10FFFF# are represented as
+ -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
+
+ elsif C in 16#1_0000# .. 16#10_FFFF# then
+ Store (2#11110_000# or
+ Shift_Right (C, 18));
+ Store (2#10_000000# or
+ Shift_Right (C and 2#111111_000000_000000#, 12));
+ Store (2#10_000000# or
+ Shift_Right (C and 2#111111_000000#, 6));
+ Store (2#10_000000# or
+ (C and 2#00_111111#));
+
+ -- All other codes are invalid
+
+ else
+ Raise_Encoding_Error (Iptr);
+ end if;
+ end loop;
+
+ return Result (1 .. Len);
+ end Encode;
+
+ -- Encode Wide_Wide_String in UTF-16
+
+ function Encode
+ (Item : Wide_Wide_String;
+ Output_BOM : Boolean := False) return UTF_16_Wide_String
+ is
+ Result : UTF_16_Wide_String (1 .. 2 * Item'Length + 1);
+ -- Worst case is each input character generates two output characters
+ -- plus one for possible BOM.
+
+ Len : Integer;
+ -- Length of output string
+
+ C : Unsigned_32;
+
+ begin
+ -- Output BOM if needed
+
+ if Output_BOM then
+ Result (1) := BOM_16 (1);
+ Len := 1;
+ else
+ Len := 0;
+ end if;
+
+ -- Loop through input characters encoding them
+
+ for Iptr in Item'Range loop
+ C := To_Unsigned_32 (Item (Iptr));
+
+ -- Codes in the range 16#00_0000#..16#00_D7FF# or 16#E000#..16#FFFD#
+ -- are output unchanged
+
+ if C <= 16#00_D7FF# or else C in 16#E000# .. 16#FFFD# then
+ Len := Len + 1;
+ Result (Len) := Wide_Character'Val (C);
+
+ -- Codes in the range 16#01_0000#..16#10_FFFF# are output using two
+ -- surrogate characters. First 16#1_0000# is subtracted from the code
+ -- point to give a 20-bit value. This is then split into two separate
+ -- 10-bit values each of which is represented as a surrogate with the
+ -- most significant half placed in the first surrogate. The ranges of
+ -- values used for the two surrogates are 16#D800#-16#DBFF# for the
+ -- first, most significant surrogate and 16#DC00#-16#DFFF# for the
+ -- second, least significant surrogate.
+
+ elsif C in 16#1_0000# .. 16#10_FFFF# then
+ C := C - 16#1_0000#;
+
+ Len := Len + 1;
+ Result (Len) := Wide_Character'Val (16#D800# + C / 2 ** 10);
+
+ Len := Len + 1;
+ Result (Len) := Wide_Character'Val (16#DC00# + C mod 2 ** 10);
+
+ -- All other codes are invalid
+
+ else
+ Raise_Encoding_Error (Iptr);
+ end if;
+ end loop;
+
+ return Result (1 .. Len);
+ end Encode;
+
+end Ada.Strings.UTF_Encoding.Wide_Wide_Strings;