797 lines
23 KiB
Ada
797 lines
23 KiB
Ada
|
------------------------------------------------------------------------------
|
||
|
-- --
|
||
|
-- GNAT RUN-TIME COMPONENTS --
|
||
|
-- --
|
||
|
-- G N A T . D E C O D E _ S T R I N G --
|
||
|
-- --
|
||
|
-- S p e c --
|
||
|
-- --
|
||
|
-- Copyright (C) 2007-2014, AdaCore --
|
||
|
-- --
|
||
|
-- GNAT is free software; you can redistribute it and/or modify it under --
|
||
|
-- terms of the GNU General Public License as published by the Free Soft- --
|
||
|
-- ware Foundation; either version 3, or (at your option) any later ver- --
|
||
|
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
||
|
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
||
|
-- or FITNESS FOR A PARTICULAR PURPOSE. --
|
||
|
-- --
|
||
|
-- As a special exception under Section 7 of GPL version 3, you are granted --
|
||
|
-- additional permissions described in the GCC Runtime Library Exception, --
|
||
|
-- version 3.1, as published by the Free Software Foundation. --
|
||
|
-- --
|
||
|
-- You should have received a copy of the GNU General Public License and --
|
||
|
-- a copy of the GCC Runtime Library Exception along with this program; --
|
||
|
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
||
|
-- <http://www.gnu.org/licenses/>. --
|
||
|
-- --
|
||
|
-- GNAT was originally developed by the GNAT team at New York University. --
|
||
|
-- Extensive contributions were provided by Ada Core Technologies Inc. --
|
||
|
-- --
|
||
|
------------------------------------------------------------------------------
|
||
|
|
||
|
-- This package provides a utility routine for converting from an encoded
|
||
|
-- string to a corresponding Wide_String or Wide_Wide_String value.
|
||
|
|
||
|
with Interfaces; use Interfaces;
|
||
|
|
||
|
with System.WCh_Cnv; use System.WCh_Cnv;
|
||
|
with System.WCh_Con; use System.WCh_Con;
|
||
|
|
||
|
package body GNAT.Decode_String is
|
||
|
|
||
|
-----------------------
|
||
|
-- Local Subprograms --
|
||
|
-----------------------
|
||
|
|
||
|
procedure Bad;
|
||
|
pragma No_Return (Bad);
|
||
|
-- Raise error for bad encoding
|
||
|
|
||
|
procedure Past_End;
|
||
|
pragma No_Return (Past_End);
|
||
|
-- Raise error for off end of string
|
||
|
|
||
|
---------
|
||
|
-- Bad --
|
||
|
---------
|
||
|
|
||
|
procedure Bad is
|
||
|
begin
|
||
|
raise Constraint_Error with
|
||
|
"bad encoding or character out of range";
|
||
|
end Bad;
|
||
|
|
||
|
---------------------------
|
||
|
-- Decode_Wide_Character --
|
||
|
---------------------------
|
||
|
|
||
|
procedure Decode_Wide_Character
|
||
|
(Input : String;
|
||
|
Ptr : in out Natural;
|
||
|
Result : out Wide_Character)
|
||
|
is
|
||
|
Char : Wide_Wide_Character;
|
||
|
begin
|
||
|
Decode_Wide_Wide_Character (Input, Ptr, Char);
|
||
|
|
||
|
if Wide_Wide_Character'Pos (Char) > 16#FFFF# then
|
||
|
Bad;
|
||
|
else
|
||
|
Result := Wide_Character'Val (Wide_Wide_Character'Pos (Char));
|
||
|
end if;
|
||
|
end Decode_Wide_Character;
|
||
|
|
||
|
------------------------
|
||
|
-- Decode_Wide_String --
|
||
|
------------------------
|
||
|
|
||
|
function Decode_Wide_String (S : String) return Wide_String is
|
||
|
Result : Wide_String (1 .. S'Length);
|
||
|
Length : Natural;
|
||
|
begin
|
||
|
Decode_Wide_String (S, Result, Length);
|
||
|
return Result (1 .. Length);
|
||
|
end Decode_Wide_String;
|
||
|
|
||
|
procedure Decode_Wide_String
|
||
|
(S : String;
|
||
|
Result : out Wide_String;
|
||
|
Length : out Natural)
|
||
|
is
|
||
|
Ptr : Natural;
|
||
|
|
||
|
begin
|
||
|
Ptr := S'First;
|
||
|
Length := 0;
|
||
|
while Ptr <= S'Last loop
|
||
|
if Length >= Result'Last then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
Length := Length + 1;
|
||
|
Decode_Wide_Character (S, Ptr, Result (Length));
|
||
|
end loop;
|
||
|
end Decode_Wide_String;
|
||
|
|
||
|
--------------------------------
|
||
|
-- Decode_Wide_Wide_Character --
|
||
|
--------------------------------
|
||
|
|
||
|
procedure Decode_Wide_Wide_Character
|
||
|
(Input : String;
|
||
|
Ptr : in out Natural;
|
||
|
Result : out Wide_Wide_Character)
|
||
|
is
|
||
|
C : Character;
|
||
|
|
||
|
function In_Char return Character;
|
||
|
pragma Inline (In_Char);
|
||
|
-- Function to get one input character
|
||
|
|
||
|
-------------
|
||
|
-- In_Char --
|
||
|
-------------
|
||
|
|
||
|
function In_Char return Character is
|
||
|
begin
|
||
|
if Ptr <= Input'Last then
|
||
|
Ptr := Ptr + 1;
|
||
|
return Input (Ptr - 1);
|
||
|
else
|
||
|
Past_End;
|
||
|
end if;
|
||
|
end In_Char;
|
||
|
|
||
|
-- Start of processing for Decode_Wide_Wide_Character
|
||
|
|
||
|
begin
|
||
|
C := In_Char;
|
||
|
|
||
|
-- Special fast processing for UTF-8 case
|
||
|
|
||
|
if Encoding_Method = WCEM_UTF8 then
|
||
|
UTF8 : declare
|
||
|
U : Unsigned_32;
|
||
|
W : Unsigned_32;
|
||
|
|
||
|
procedure Get_UTF_Byte;
|
||
|
pragma Inline (Get_UTF_Byte);
|
||
|
-- Used to interpret 2#10xxxxxx# continuation byte in UTF-8 mode.
|
||
|
-- Reads a byte, and raises CE if the first two bits are not 10.
|
||
|
-- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
|
||
|
|
||
|
------------------
|
||
|
-- Get_UTF_Byte --
|
||
|
------------------
|
||
|
|
||
|
procedure Get_UTF_Byte is
|
||
|
begin
|
||
|
U := Unsigned_32 (Character'Pos (In_Char));
|
||
|
|
||
|
if (U and 2#11000000#) /= 2#10_000000# then
|
||
|
Bad;
|
||
|
end if;
|
||
|
|
||
|
W := Shift_Left (W, 6) or (U and 2#00111111#);
|
||
|
end Get_UTF_Byte;
|
||
|
|
||
|
-- Start of processing for UTF8 case
|
||
|
|
||
|
begin
|
||
|
-- Note: for details of UTF8 encoding see RFC 3629
|
||
|
|
||
|
U := Unsigned_32 (Character'Pos (C));
|
||
|
|
||
|
-- 16#00_0000#-16#00_007F#: 0xxxxxxx
|
||
|
|
||
|
if (U and 2#10000000#) = 2#00000000# then
|
||
|
Result := Wide_Wide_Character'Val (Character'Pos (C));
|
||
|
|
||
|
-- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
|
||
|
|
||
|
elsif (U and 2#11100000#) = 2#110_00000# then
|
||
|
W := U and 2#00011111#;
|
||
|
Get_UTF_Byte;
|
||
|
|
||
|
if W not in 16#00_0080# .. 16#00_07FF# then
|
||
|
Bad;
|
||
|
end if;
|
||
|
|
||
|
Result := Wide_Wide_Character'Val (W);
|
||
|
|
||
|
-- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
|
||
|
|
||
|
elsif (U and 2#11110000#) = 2#1110_0000# then
|
||
|
W := U and 2#00001111#;
|
||
|
Get_UTF_Byte;
|
||
|
Get_UTF_Byte;
|
||
|
|
||
|
if W not in 16#00_0800# .. 16#00_FFFF# then
|
||
|
Bad;
|
||
|
end if;
|
||
|
|
||
|
Result := Wide_Wide_Character'Val (W);
|
||
|
|
||
|
-- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||
|
|
||
|
elsif (U and 2#11111000#) = 2#11110_000# then
|
||
|
W := U and 2#00000111#;
|
||
|
|
||
|
for K in 1 .. 3 loop
|
||
|
Get_UTF_Byte;
|
||
|
end loop;
|
||
|
|
||
|
if W not in 16#01_0000# .. 16#10_FFFF# then
|
||
|
Bad;
|
||
|
end if;
|
||
|
|
||
|
Result := Wide_Wide_Character'Val (W);
|
||
|
|
||
|
-- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
|
||
|
elsif (U and 2#11111100#) = 2#111110_00# then
|
||
|
W := U and 2#00000011#;
|
||
|
|
||
|
for K in 1 .. 4 loop
|
||
|
Get_UTF_Byte;
|
||
|
end loop;
|
||
|
|
||
|
if W not in 16#0020_0000# .. 16#03FF_FFFF# then
|
||
|
Bad;
|
||
|
end if;
|
||
|
|
||
|
Result := Wide_Wide_Character'Val (W);
|
||
|
|
||
|
-- All other cases are invalid, note that this includes:
|
||
|
|
||
|
-- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx 10xxxxxx
|
||
|
|
||
|
-- since Wide_Wide_Character does not include code values
|
||
|
-- greater than 16#03FF_FFFF#.
|
||
|
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end UTF8;
|
||
|
|
||
|
-- All encoding functions other than UTF-8
|
||
|
|
||
|
else
|
||
|
Non_UTF8 : declare
|
||
|
function Char_Sequence_To_UTF is
|
||
|
new Char_Sequence_To_UTF_32 (In_Char);
|
||
|
|
||
|
begin
|
||
|
-- For brackets, must test for specific case of [ not followed by
|
||
|
-- quotation, where we must not call Char_Sequence_To_UTF, but
|
||
|
-- instead just return the bracket unchanged.
|
||
|
|
||
|
if Encoding_Method = WCEM_Brackets
|
||
|
and then C = '['
|
||
|
and then (Ptr > Input'Last or else Input (Ptr) /= '"')
|
||
|
then
|
||
|
Result := '[';
|
||
|
|
||
|
-- All other cases including [" with Brackets
|
||
|
|
||
|
else
|
||
|
Result :=
|
||
|
Wide_Wide_Character'Val
|
||
|
(Char_Sequence_To_UTF (C, Encoding_Method));
|
||
|
end if;
|
||
|
end Non_UTF8;
|
||
|
end if;
|
||
|
end Decode_Wide_Wide_Character;
|
||
|
|
||
|
-----------------------------
|
||
|
-- Decode_Wide_Wide_String --
|
||
|
-----------------------------
|
||
|
|
||
|
function Decode_Wide_Wide_String (S : String) return Wide_Wide_String is
|
||
|
Result : Wide_Wide_String (1 .. S'Length);
|
||
|
Length : Natural;
|
||
|
begin
|
||
|
Decode_Wide_Wide_String (S, Result, Length);
|
||
|
return Result (1 .. Length);
|
||
|
end Decode_Wide_Wide_String;
|
||
|
|
||
|
procedure Decode_Wide_Wide_String
|
||
|
(S : String;
|
||
|
Result : out Wide_Wide_String;
|
||
|
Length : out Natural)
|
||
|
is
|
||
|
Ptr : Natural;
|
||
|
|
||
|
begin
|
||
|
Ptr := S'First;
|
||
|
Length := 0;
|
||
|
while Ptr <= S'Last loop
|
||
|
if Length >= Result'Last then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
Length := Length + 1;
|
||
|
Decode_Wide_Wide_Character (S, Ptr, Result (Length));
|
||
|
end loop;
|
||
|
end Decode_Wide_Wide_String;
|
||
|
|
||
|
-------------------------
|
||
|
-- Next_Wide_Character --
|
||
|
-------------------------
|
||
|
|
||
|
procedure Next_Wide_Character (Input : String; Ptr : in out Natural) is
|
||
|
Discard : Wide_Character;
|
||
|
begin
|
||
|
Decode_Wide_Character (Input, Ptr, Discard);
|
||
|
end Next_Wide_Character;
|
||
|
|
||
|
------------------------------
|
||
|
-- Next_Wide_Wide_Character --
|
||
|
------------------------------
|
||
|
|
||
|
procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
|
||
|
Discard : Wide_Wide_Character;
|
||
|
begin
|
||
|
Decode_Wide_Wide_Character (Input, Ptr, Discard);
|
||
|
end Next_Wide_Wide_Character;
|
||
|
|
||
|
--------------
|
||
|
-- Past_End --
|
||
|
--------------
|
||
|
|
||
|
procedure Past_End is
|
||
|
begin
|
||
|
raise Constraint_Error with "past end of string";
|
||
|
end Past_End;
|
||
|
|
||
|
-------------------------
|
||
|
-- Prev_Wide_Character --
|
||
|
-------------------------
|
||
|
|
||
|
procedure Prev_Wide_Character (Input : String; Ptr : in out Natural) is
|
||
|
begin
|
||
|
if Ptr > Input'Last + 1 then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
-- Special efficient encoding for UTF-8 case
|
||
|
|
||
|
if Encoding_Method = WCEM_UTF8 then
|
||
|
UTF8 : declare
|
||
|
U : Unsigned_32;
|
||
|
|
||
|
procedure Getc;
|
||
|
pragma Inline (Getc);
|
||
|
-- Gets the character at Input (Ptr - 1) and returns code in U as
|
||
|
-- Unsigned_32 value. On return Ptr is decremented by one.
|
||
|
|
||
|
procedure Skip_UTF_Byte;
|
||
|
pragma Inline (Skip_UTF_Byte);
|
||
|
-- Checks that U is 2#10xxxxxx# and then calls Get
|
||
|
|
||
|
----------
|
||
|
-- Getc --
|
||
|
----------
|
||
|
|
||
|
procedure Getc is
|
||
|
begin
|
||
|
if Ptr <= Input'First then
|
||
|
Past_End;
|
||
|
else
|
||
|
Ptr := Ptr - 1;
|
||
|
U := Unsigned_32 (Character'Pos (Input (Ptr)));
|
||
|
end if;
|
||
|
end Getc;
|
||
|
|
||
|
-------------------
|
||
|
-- Skip_UTF_Byte --
|
||
|
-------------------
|
||
|
|
||
|
procedure Skip_UTF_Byte is
|
||
|
begin
|
||
|
if (U and 2#11000000#) = 2#10_000000# then
|
||
|
Getc;
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end Skip_UTF_Byte;
|
||
|
|
||
|
-- Start of processing for UTF-8 case
|
||
|
|
||
|
begin
|
||
|
-- 16#00_0000#-16#00_007F#: 0xxxxxxx
|
||
|
|
||
|
Getc;
|
||
|
|
||
|
if (U and 2#10000000#) = 2#00000000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11100000#) = 2#110_00000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11110000#) = 2#1110_0000# then
|
||
|
return;
|
||
|
|
||
|
-- Any other code is invalid, note that this includes:
|
||
|
|
||
|
-- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx
|
||
|
|
||
|
-- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx
|
||
|
|
||
|
-- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
|
||
|
-- since Wide_Character does not allow codes > 16#FFFF#
|
||
|
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end if;
|
||
|
end if;
|
||
|
end UTF8;
|
||
|
|
||
|
-- Special efficient encoding for brackets case
|
||
|
|
||
|
elsif Encoding_Method = WCEM_Brackets then
|
||
|
Brackets : declare
|
||
|
P : Natural;
|
||
|
S : Natural;
|
||
|
|
||
|
begin
|
||
|
-- See if we have "] at end positions
|
||
|
|
||
|
if Ptr > Input'First + 1
|
||
|
and then Input (Ptr - 1) = ']'
|
||
|
and then Input (Ptr - 2) = '"'
|
||
|
then
|
||
|
P := Ptr - 2;
|
||
|
|
||
|
-- Loop back looking for [" at start
|
||
|
|
||
|
while P >= Ptr - 10 loop
|
||
|
if P <= Input'First + 1 then
|
||
|
Bad;
|
||
|
|
||
|
elsif Input (P - 1) = '"'
|
||
|
and then Input (P - 2) = '['
|
||
|
then
|
||
|
-- Found ["..."], scan forward to check it
|
||
|
|
||
|
S := P - 2;
|
||
|
P := S;
|
||
|
Next_Wide_Character (Input, P);
|
||
|
|
||
|
-- OK if at original pointer, else error
|
||
|
|
||
|
if P = Ptr then
|
||
|
Ptr := S;
|
||
|
return;
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end if;
|
||
|
|
||
|
P := P - 1;
|
||
|
end loop;
|
||
|
|
||
|
-- Falling through loop means more than 8 chars between the
|
||
|
-- enclosing brackets (or simply a missing left bracket)
|
||
|
|
||
|
Bad;
|
||
|
|
||
|
-- Here if no bracket sequence present
|
||
|
|
||
|
else
|
||
|
if Ptr = Input'First then
|
||
|
Past_End;
|
||
|
else
|
||
|
Ptr := Ptr - 1;
|
||
|
end if;
|
||
|
end if;
|
||
|
end Brackets;
|
||
|
|
||
|
-- Non-UTF-8/Brackets. These are the inefficient cases where we have to
|
||
|
-- go to the start of the string and skip forwards till Ptr matches.
|
||
|
|
||
|
else
|
||
|
Non_UTF_Brackets : declare
|
||
|
Discard : Wide_Character;
|
||
|
PtrS : Natural;
|
||
|
PtrP : Natural;
|
||
|
|
||
|
begin
|
||
|
PtrS := Input'First;
|
||
|
|
||
|
if Ptr <= PtrS then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
loop
|
||
|
PtrP := PtrS;
|
||
|
Decode_Wide_Character (Input, PtrS, Discard);
|
||
|
|
||
|
if PtrS = Ptr then
|
||
|
Ptr := PtrP;
|
||
|
return;
|
||
|
|
||
|
elsif PtrS > Ptr then
|
||
|
Bad;
|
||
|
end if;
|
||
|
end loop;
|
||
|
|
||
|
exception
|
||
|
when Constraint_Error =>
|
||
|
Bad;
|
||
|
end Non_UTF_Brackets;
|
||
|
end if;
|
||
|
end Prev_Wide_Character;
|
||
|
|
||
|
------------------------------
|
||
|
-- Prev_Wide_Wide_Character --
|
||
|
------------------------------
|
||
|
|
||
|
procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
|
||
|
begin
|
||
|
if Ptr > Input'Last + 1 then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
-- Special efficient encoding for UTF-8 case
|
||
|
|
||
|
if Encoding_Method = WCEM_UTF8 then
|
||
|
UTF8 : declare
|
||
|
U : Unsigned_32;
|
||
|
|
||
|
procedure Getc;
|
||
|
pragma Inline (Getc);
|
||
|
-- Gets the character at Input (Ptr - 1) and returns code in U as
|
||
|
-- Unsigned_32 value. On return Ptr is decremented by one.
|
||
|
|
||
|
procedure Skip_UTF_Byte;
|
||
|
pragma Inline (Skip_UTF_Byte);
|
||
|
-- Checks that U is 2#10xxxxxx# and then calls Get
|
||
|
|
||
|
----------
|
||
|
-- Getc --
|
||
|
----------
|
||
|
|
||
|
procedure Getc is
|
||
|
begin
|
||
|
if Ptr <= Input'First then
|
||
|
Past_End;
|
||
|
else
|
||
|
Ptr := Ptr - 1;
|
||
|
U := Unsigned_32 (Character'Pos (Input (Ptr)));
|
||
|
end if;
|
||
|
end Getc;
|
||
|
|
||
|
-------------------
|
||
|
-- Skip_UTF_Byte --
|
||
|
-------------------
|
||
|
|
||
|
procedure Skip_UTF_Byte is
|
||
|
begin
|
||
|
if (U and 2#11000000#) = 2#10_000000# then
|
||
|
Getc;
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end Skip_UTF_Byte;
|
||
|
|
||
|
-- Start of processing for UTF-8 case
|
||
|
|
||
|
begin
|
||
|
-- 16#00_0000#-16#00_007F#: 0xxxxxxx
|
||
|
|
||
|
Getc;
|
||
|
|
||
|
if (U and 2#10000000#) = 2#00000000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11100000#) = 2#110_00000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11110000#) = 2#1110_0000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11111000#) = 2#11110_000# then
|
||
|
return;
|
||
|
|
||
|
-- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx
|
||
|
|
||
|
else
|
||
|
Skip_UTF_Byte;
|
||
|
|
||
|
if (U and 2#11111100#) = 2#111110_00# then
|
||
|
return;
|
||
|
|
||
|
-- Any other code is invalid, note that this includes:
|
||
|
|
||
|
-- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
-- 10xxxxxx 10xxxxxx
|
||
|
|
||
|
-- since Wide_Wide_Character does not allow codes
|
||
|
-- greater than 16#03FF_FFFF#
|
||
|
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end if;
|
||
|
end if;
|
||
|
end if;
|
||
|
end if;
|
||
|
end UTF8;
|
||
|
|
||
|
-- Special efficient encoding for brackets case
|
||
|
|
||
|
elsif Encoding_Method = WCEM_Brackets then
|
||
|
Brackets : declare
|
||
|
P : Natural;
|
||
|
S : Natural;
|
||
|
|
||
|
begin
|
||
|
-- See if we have "] at end positions
|
||
|
|
||
|
if Ptr > Input'First + 1
|
||
|
and then Input (Ptr - 1) = ']'
|
||
|
and then Input (Ptr - 2) = '"'
|
||
|
then
|
||
|
P := Ptr - 2;
|
||
|
|
||
|
-- Loop back looking for [" at start
|
||
|
|
||
|
while P >= Ptr - 10 loop
|
||
|
if P <= Input'First + 1 then
|
||
|
Bad;
|
||
|
|
||
|
elsif Input (P - 1) = '"'
|
||
|
and then Input (P - 2) = '['
|
||
|
then
|
||
|
-- Found ["..."], scan forward to check it
|
||
|
|
||
|
S := P - 2;
|
||
|
P := S;
|
||
|
Next_Wide_Wide_Character (Input, P);
|
||
|
|
||
|
-- OK if at original pointer, else error
|
||
|
|
||
|
if P = Ptr then
|
||
|
Ptr := S;
|
||
|
return;
|
||
|
else
|
||
|
Bad;
|
||
|
end if;
|
||
|
end if;
|
||
|
|
||
|
P := P - 1;
|
||
|
end loop;
|
||
|
|
||
|
-- Falling through loop means more than 8 chars between the
|
||
|
-- enclosing brackets (or simply a missing left bracket)
|
||
|
|
||
|
Bad;
|
||
|
|
||
|
-- Here if no bracket sequence present
|
||
|
|
||
|
else
|
||
|
if Ptr = Input'First then
|
||
|
Past_End;
|
||
|
else
|
||
|
Ptr := Ptr - 1;
|
||
|
end if;
|
||
|
end if;
|
||
|
end Brackets;
|
||
|
|
||
|
-- Non-UTF-8/Brackets. These are the inefficient cases where we have to
|
||
|
-- go to the start of the string and skip forwards till Ptr matches.
|
||
|
|
||
|
else
|
||
|
Non_UTF8_Brackets : declare
|
||
|
Discard : Wide_Wide_Character;
|
||
|
PtrS : Natural;
|
||
|
PtrP : Natural;
|
||
|
|
||
|
begin
|
||
|
PtrS := Input'First;
|
||
|
|
||
|
if Ptr <= PtrS then
|
||
|
Past_End;
|
||
|
end if;
|
||
|
|
||
|
loop
|
||
|
PtrP := PtrS;
|
||
|
Decode_Wide_Wide_Character (Input, PtrS, Discard);
|
||
|
|
||
|
if PtrS = Ptr then
|
||
|
Ptr := PtrP;
|
||
|
return;
|
||
|
|
||
|
elsif PtrS > Ptr then
|
||
|
Bad;
|
||
|
end if;
|
||
|
end loop;
|
||
|
|
||
|
exception
|
||
|
when Constraint_Error =>
|
||
|
Bad;
|
||
|
end Non_UTF8_Brackets;
|
||
|
end if;
|
||
|
end Prev_Wide_Wide_Character;
|
||
|
|
||
|
--------------------------
|
||
|
-- Validate_Wide_String --
|
||
|
--------------------------
|
||
|
|
||
|
function Validate_Wide_String (S : String) return Boolean is
|
||
|
Ptr : Natural;
|
||
|
|
||
|
begin
|
||
|
Ptr := S'First;
|
||
|
while Ptr <= S'Last loop
|
||
|
Next_Wide_Character (S, Ptr);
|
||
|
end loop;
|
||
|
|
||
|
return True;
|
||
|
|
||
|
exception
|
||
|
when Constraint_Error =>
|
||
|
return False;
|
||
|
end Validate_Wide_String;
|
||
|
|
||
|
-------------------------------
|
||
|
-- Validate_Wide_Wide_String --
|
||
|
-------------------------------
|
||
|
|
||
|
function Validate_Wide_Wide_String (S : String) return Boolean is
|
||
|
Ptr : Natural;
|
||
|
|
||
|
begin
|
||
|
Ptr := S'First;
|
||
|
while Ptr <= S'Last loop
|
||
|
Next_Wide_Wide_Character (S, Ptr);
|
||
|
end loop;
|
||
|
|
||
|
return True;
|
||
|
|
||
|
exception
|
||
|
when Constraint_Error =>
|
||
|
return False;
|
||
|
end Validate_Wide_Wide_String;
|
||
|
|
||
|
end GNAT.Decode_String;
|