259 lines
8.9 KiB
Ada
259 lines
8.9 KiB
Ada
------------------------------------------------------------------------------
|
|
-- --
|
|
-- GNAT RUN-TIME COMPONENTS --
|
|
-- --
|
|
-- G N A T . E N C O D E _ S T R I N G --
|
|
-- --
|
|
-- B o d y --
|
|
-- --
|
|
-- Copyright (C) 2007-2012, Free Software Foundation, Inc. --
|
|
-- --
|
|
-- GNAT is free software; you can redistribute it and/or modify it under --
|
|
-- terms of the GNU General Public License as published by the Free Soft- --
|
|
-- ware Foundation; either version 3, or (at your option) any later ver- --
|
|
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
|
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
|
-- or FITNESS FOR A PARTICULAR PURPOSE. --
|
|
-- --
|
|
-- As a special exception under Section 7 of GPL version 3, you are granted --
|
|
-- additional permissions described in the GCC Runtime Library Exception, --
|
|
-- version 3.1, as published by the Free Software Foundation. --
|
|
-- --
|
|
-- You should have received a copy of the GNU General Public License and --
|
|
-- a copy of the GCC Runtime Library Exception along with this program; --
|
|
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
|
-- <http://www.gnu.org/licenses/>. --
|
|
-- --
|
|
-- GNAT was originally developed by the GNAT team at New York University. --
|
|
-- Extensive contributions were provided by Ada Core Technologies Inc. --
|
|
-- --
|
|
------------------------------------------------------------------------------
|
|
|
|
with Interfaces; use Interfaces;
|
|
|
|
with System.WCh_Con; use System.WCh_Con;
|
|
with System.WCh_Cnv; use System.WCh_Cnv;
|
|
|
|
package body GNAT.Encode_String is
|
|
|
|
-----------------------
|
|
-- Local Subprograms --
|
|
-----------------------
|
|
|
|
procedure Bad;
|
|
pragma No_Return (Bad);
|
|
-- Raise error for bad character code
|
|
|
|
procedure Past_End;
|
|
pragma No_Return (Past_End);
|
|
-- Raise error for off end of string
|
|
|
|
---------
|
|
-- Bad --
|
|
---------
|
|
|
|
procedure Bad is
|
|
begin
|
|
raise Constraint_Error with
|
|
"character cannot be encoded with given Encoding_Method";
|
|
end Bad;
|
|
|
|
------------------------
|
|
-- Encode_Wide_String --
|
|
------------------------
|
|
|
|
function Encode_Wide_String (S : Wide_String) return String is
|
|
Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
|
|
Result : String (1 .. S'Length * Long);
|
|
Length : Natural;
|
|
begin
|
|
Encode_Wide_String (S, Result, Length);
|
|
return Result (1 .. Length);
|
|
end Encode_Wide_String;
|
|
|
|
procedure Encode_Wide_String
|
|
(S : Wide_String;
|
|
Result : out String;
|
|
Length : out Natural)
|
|
is
|
|
Ptr : Natural;
|
|
|
|
begin
|
|
Ptr := S'First;
|
|
for J in S'Range loop
|
|
Encode_Wide_Character (S (J), Result, Ptr);
|
|
end loop;
|
|
|
|
Length := Ptr - S'First;
|
|
end Encode_Wide_String;
|
|
|
|
-----------------------------
|
|
-- Encode_Wide_Wide_String --
|
|
-----------------------------
|
|
|
|
function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
|
|
Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
|
|
Result : String (1 .. S'Length * Long);
|
|
Length : Natural;
|
|
begin
|
|
Encode_Wide_Wide_String (S, Result, Length);
|
|
return Result (1 .. Length);
|
|
end Encode_Wide_Wide_String;
|
|
|
|
procedure Encode_Wide_Wide_String
|
|
(S : Wide_Wide_String;
|
|
Result : out String;
|
|
Length : out Natural)
|
|
is
|
|
Ptr : Natural;
|
|
|
|
begin
|
|
Ptr := S'First;
|
|
for J in S'Range loop
|
|
Encode_Wide_Wide_Character (S (J), Result, Ptr);
|
|
end loop;
|
|
|
|
Length := Ptr - S'First;
|
|
end Encode_Wide_Wide_String;
|
|
|
|
---------------------------
|
|
-- Encode_Wide_Character --
|
|
---------------------------
|
|
|
|
procedure Encode_Wide_Character
|
|
(Char : Wide_Character;
|
|
Result : in out String;
|
|
Ptr : in out Natural)
|
|
is
|
|
begin
|
|
Encode_Wide_Wide_Character
|
|
(Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);
|
|
|
|
exception
|
|
when Constraint_Error =>
|
|
Bad;
|
|
end Encode_Wide_Character;
|
|
|
|
--------------------------------
|
|
-- Encode_Wide_Wide_Character --
|
|
--------------------------------
|
|
|
|
procedure Encode_Wide_Wide_Character
|
|
(Char : Wide_Wide_Character;
|
|
Result : in out String;
|
|
Ptr : in out Natural)
|
|
is
|
|
U : Unsigned_32;
|
|
|
|
procedure Out_Char (C : Character);
|
|
pragma Inline (Out_Char);
|
|
-- Procedure to store one character for instantiation below
|
|
|
|
--------------
|
|
-- Out_Char --
|
|
--------------
|
|
|
|
procedure Out_Char (C : Character) is
|
|
begin
|
|
if Ptr > Result'Last then
|
|
Past_End;
|
|
else
|
|
Result (Ptr) := C;
|
|
Ptr := Ptr + 1;
|
|
end if;
|
|
end Out_Char;
|
|
|
|
-- Start of processing for Encode_Wide_Wide_Character;
|
|
|
|
begin
|
|
-- Efficient code for UTF-8 case
|
|
|
|
if Encoding_Method = WCEM_UTF8 then
|
|
|
|
-- Note: for details of UTF8 encoding see RFC 3629
|
|
|
|
U := Unsigned_32 (Wide_Wide_Character'Pos (Char));
|
|
|
|
-- 16#00_0000#-16#00_007F#: 0xxxxxxx
|
|
|
|
if U <= 16#00_007F# then
|
|
Out_Char (Character'Val (U));
|
|
|
|
-- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
|
|
|
|
elsif U <= 16#00_07FF# then
|
|
Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
|
|
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
|
|
|
|
-- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
elsif U <= 16#00_FFFF# then
|
|
Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
|
|
|
|
-- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
elsif U <= 16#10_FFFF# then
|
|
Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
|
|
|
|
-- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
|
|
-- 10xxxxxx 10xxxxxx
|
|
|
|
elsif U <= 16#03FF_FFFF# then
|
|
Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
|
|
and 2#00111111#)));
|
|
Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
|
|
|
|
-- All other cases are invalid character codes, not this includes:
|
|
|
|
-- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
|
|
-- 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
-- since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
|
|
|
|
else
|
|
Bad;
|
|
end if;
|
|
|
|
-- All encoding methods other than UTF-8
|
|
|
|
else
|
|
Non_UTF8 : declare
|
|
procedure UTF_32_To_String is
|
|
new UTF_32_To_Char_Sequence (Out_Char);
|
|
-- Instantiate conversion procedure with above Out_Char routine
|
|
|
|
begin
|
|
UTF_32_To_String
|
|
(UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);
|
|
|
|
exception
|
|
when Constraint_Error =>
|
|
Bad;
|
|
end Non_UTF8;
|
|
end if;
|
|
end Encode_Wide_Wide_Character;
|
|
|
|
--------------
|
|
-- Past_End --
|
|
--------------
|
|
|
|
procedure Past_End is
|
|
begin
|
|
raise Constraint_Error with "past end of string";
|
|
end Past_End;
|
|
|
|
end GNAT.Encode_String;
|