/* -*- Mode:C++; c++-file-style:"gnu"; indent-tabs-mode:nil; -*- */
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3 as
 * published by the Free Software Foundation;
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Author: Marco Guastella alias Vasta 
 * Web page:<www.ragnu.it> 
 * Email:<vasta@ragnu.it>
   Date last update: 26/03/2024
 */

#include "vmp.h"

namespace vampiria { namespace vmp { namespace unicode {

vmp_size str_write(vmp::str *pstr,const vmp_char *fmt,...)
{
    va_list ap;
    va_start(ap,fmt);    
    vmp::str sfmt=va_wrap(fmt,ap);
    va_end(ap);
    (*pstr)=sfmt;
    return (*pstr).size();
}

vmp_size str_cwrite(vmp::str *pstr,const vmp_char *fmt,...)
{
    va_list ap;
    va_start(ap,fmt);    
    vmp::str sfmt=va_wrap(fmt,ap);
    va_end(ap);
    (*pstr)+=sfmt;
    return (*pstr).size();
}

vmp_char char_tolower(vmp_char c)
{
     return std::tolower(c);
}

vmp_char char_toupper(vmp_char c)
{
     return std::toupper(c);
}

vmp::str str_tolower(vmp::str str)
{
    vmp::str ret="";
    for(vmp_index i=0;i<str.size();i++)
        ret+=char_tolower(str[i]);
    return ret;
}

vmp::str str_toupper(vmp::str str)
{
    vmp::str ret="";
    for(vmp_index i=0;i<str.size();i++)
        ret+=char_toupper(str[i]);
    return ret;   
}

vmp::vector<vmp::str> str_split(vmp::str str,vmp::str delimiter)
{
    vmp::vector<vmp::str> ret;
    vmp::str tmp="";         
    vmp_index i=0;
    if(delimiter.size() == 0)
    {
        for(i=0;i<str.size();i++)
        {
            tmp=str[i];
            ret.push_back(tmp);
            tmp="";
        }
    }
    else
    {        
        while(i<str.size())
        {
            if(str.substr(i,delimiter.size()) == delimiter)
            {
                if(tmp.size() != 0)
                    ret.push_back(tmp);
                tmp="";                        
                i += delimiter.size();
            }    
            else
                tmp += str[i++];
        }
    }
    if(tmp.size() != 0)
        ret.push_back(tmp);
    return ret;
}
vmp::str str_join(vmp::vector<vmp::str> list,vmp::str token)
{
    vmp::str ret="";
    vmp_size s=list.size()-1;
    for(vmp_index i=0;i<s+1;i++)
    {
        ret += list[i];
        if(i != s)                
           ret += token;
    }
    return ret;
}

vmp_bool byte_istype(vmp_byte byte,vmp::str type)
{
    return vmp::istype_wrap(byte,type);
}

vmp_uint byte_nbit(vmp_byte byte,vmp_uint n)
{
    return (byte >> n) & 0x01;
}

vmp_bool str_istype_impl(vmp::str str,vmp::str type)
{
    for(vmp_index i=0;i<str.size();i++)
        if(!vmp::unicode::byte_istype(str[i],type))
            return false;
    return true;
}

vmp_bool str_istype(vmp::str str,vmp::str type)
{
    if(type == "real")
    {
        vmp::vector<vmp::str> numbers=vmp::unicode::str_split(str,".");
        if(numbers.size() == 1)
            return vmp::unicode::str_istype_impl(numbers[0],"digit");
        else if (numbers.size() == 2)
        {
            if(vmp::unicode::str_istype_impl(numbers[0],"digit"))
                if(!((numbers[1].size() == 0) || (numbers[1][0] == '-')))
                    return vmp::unicode::str_istype_impl(numbers[1],"digit");
        }  
        return false;
    }
    return vmp::unicode::str_istype_impl(str,type);
}

vmp_int str_todigit(vmp::str istr)
{
    if(!vmp::unicode::str_istype(istr,"digit"))
        vmp::except("Invalid Conversion from string to int because '%s' not integer",istr.c_str());   
    return vmp::atoi_wrap(istr);
}

vmp_int str_todigit_range(vmp::str istr,vmp_int min,vmp_int max)
{
    if(min > max)
        vmp::except("Min value %d not less max value %d in input",min,max);
    vmp_int ret=vmp::unicode::str_todigit(istr);
    if((ret < min) || (ret > max))
        vmp::except("%s not integer in range [%d,%d]",istr.c_str(),min,max);
    return ret; 
}

vmp_real str_toreal(vmp::str dstr)
{
    vmp_real ret;
    if(!vmp::unicode::str_istype(dstr,"real"))
        vmp::except("Invalid Conversion from string to int because '%s' not real",dstr.c_str());   
    ret=vmp::atof_wrap(dstr.c_str());
    return ret;   
}

vmp_real str_toreal_range(vmp::str dstr,vmp_real min,vmp_real max)
{
    if(min > max)
        vmp::except("Min value %f not less max value %f in input",min,max);
    vmp_real ret=vmp::unicode::str_toreal(dstr);
    if((ret < min) || (ret > max))
        vmp::except("%s not double in range [%f,%f]",dstr.c_str(),min,max);
    return ret; 
}

vmp::vector<vmp_byte> xstr_tobytes(vmp::str xstr)
{
    vmp::vector<vmp_byte> ret;
    vmp_byte b=0x00;
    vmp::str tmp;
    if((xstr.size() % 2) == 1)
        vmp::unicode::str_write(&tmp,"0%s",xstr.c_str());
    else
        tmp=xstr;
    for(vmp_index i=0;i<tmp.size();i++)
    {
        switch(tmp[i])
        {
            case '0': b|=0x00;break;
            case '1': b|=0x01;break;
            case '2': b|=0x02;break;
            case '3': b|=0x03;break;
            case '4': b|=0x04;break;
            case '5': b|=0x05;break;
            case '6': b|=0x06;break;
            case '7': b|=0x07;break;
            case '8': b|=0x08;break;
            case '9': b|=0x09;break;
            case 'a': b|=0x0A;break;
            case 'A': b|=0x0A;break;
            case 'b': b|=0x0B;break;
            case 'B': b|=0x0B;break;
            case 'c': b|=0x0C;break;
            case 'C': b|=0x0C;break;
            case 'd': b|=0x0D;break;
            case 'D': b|=0x0D;break;
            case 'e': b|=0x0E;break;
            case 'E': b|=0x0E;break;
            case 'f': b|=0x0F;break;
            case 'F': b|=0x0F;break;
            default:
                vmp::except("String '%s' is not hexadecimal",xstr.c_str());
       }
       if((i%2) == 0)
           b = b << 4;
       else
       {
           ret.push_back(b);
           b=0x00;
       }    
    }
    return ret;
}

vmp::str bytes_toxstr(vmp::vector<vmp_byte> bytes)
{
    vmp::str ret="";
    for(vmp_index i=0;i<bytes.size();i++)
       vmp::unicode::str_cwrite(&ret,"%02X",bytes[i]);
    return ret;
}

vmp::str bytes_toxstr_hm(vmp::vector<vmp_byte> bytes,vmp::str delimiter)
{
    vmp::str ret="";
    for(vmp_index i=0;i<bytes.size();i++)
    {
       if(i == 0)
           vmp::unicode::str_cwrite(&ret,"%02X",bytes[i]);
       else
           vmp::unicode::str_cwrite(&ret,"%s%02X",delimiter.c_str(),bytes[i]);
    }
    return ret;
}

vmp::str str_toxstr(vmp::str str,vmp::str delimiter)
{
    vmp::Buf buf;
    buf.write_str(str);
    buf.index();
    return buf.read_xstr_hm(buf.size(),delimiter);
}

vmp::str xstr_tostr(vmp::str xstr,vmp::str delimiter)
{
    vmp::str cstr;
    if(delimiter != "")
    {
        vmp::vector<vmp::str> split=vmp::unicode::str_split(xstr,delimiter);
        for(vmp_index i=0;i<split.size();i++)
        {
            if(split[i].size() == 1)
                vmp::unicode::str_write(&split[i],"0%s",split[i].c_str());
            else if(split[i].size() == 0)
                vmp::unicode::str_write(&split[i],"00");
            else if (split[i].size() > 2)
                vmp::except("String '%s' is not hexadecimal(delimiter = \'%s\')",xstr.c_str(),delimiter.c_str());
    
        }
        cstr=vmp::unicode::str_join(split,"");
    }
    else
        cstr=xstr;
    vmp::Buf buf;
    try
    {
        buf.write_xstr(cstr);
    }
    catch(vmp::exception &x)
    {
        vmp::except("String '%s' is not hexadecimal(delimiter = \'%s\')",xstr.c_str(),delimiter.c_str());
    }
    buf.index();
    return buf.read_str(buf.size());
}

vmp::str str_sub(vmp::str str,vmp_index pos,vmp_size len)
{
    vmp::str ret;
    try
    {
        ret=str.substr((vmp::str_size)pos,(vmp::str_size)len);
    }
    catch(vmp::exception &x)
    {
        vmp::except_s(x.what());
    }
    return ret;
}

vmp_bool str_findsub(vmp::str str,vmp::str sub)
{
    vmp::str_size found=str.find(sub);
    if(found == vmp::str_npos)
        return false;
    return true;
}

vmp::str str_replace(vmp::str text,vmp::str find,vmp::str sub)
{
    vmp::vector<vmp::str> vtext=vmp::unicode::str_split(text,find);
    return vmp::unicode::str_join(vtext,sub);
}

vmp_bool str_casecmp(vmp::str str1,vmp::str str2)
{
    vmp_size s=str1.size();
    if((s == str2.size()) && vmp::strncasecmp_wrap(str1.c_str(),str2.c_str(),s) == 0)
        return true;
    return false;
}

vmp::str str_trim(vmp::str str)
{
    vmp::str ret=vmp::unicode::str_format(str);
    vmp_size s=ret.size();    
    while((s > 0) && (ret[0] == ' '))
    { 
        s=s-1;
        ret=str_sub(ret,1,s);
    }
    while((s > 0) && (ret[s-1] == ' '))
    {
        s=s-1;
        ret=str_sub(ret,0,s);
    }
    return ret;
}

vmp_bool str_isword(vmp::str str)
{
    if(str.size() == 0)
        return false; 
    for(vmp_index i=0;i<str.size();i++)
        if((str[i] <= 0x20)||(str[i] >= 0x7E))
	   return false;
    return true;
}

vmp::str str_format(vmp::str content)
{
    vmp::str ret="";
    for(vmp_index i=0;i<content.size();i++)
    {
        if((content[i] <= 0x20) || (content[i] >= 0x7E))
        {    
             if((ret.size() != 0) && (ret[ret.size()-1] != 0x20))
                ret += " ";
        }
        else
             ret +=content[i];        
    }
    return ret;
}

vmp::str str_format_maxline(vmp::str content,vmp_size maxline)
{
    vmp::str ret="";
    content=vmp::unicode::str_format(content);
    vmp::vector<vmp::str> split=vmp::unicode::str_split(content," ");
    vmp_index line=0;        
    for(vmp_index i=0;i<split.size();i++)
    {
        if(line >= maxline)
        {
            ret += "\n";
            line=0;
        }
        else if(i != 0)
        {
            ret += " ";
            line++;                
        }                
        ret += split[i];
        line += split[i].size();        
    }
    return ret;
}

vmp::str str_format_end(vmp::str content)
{
    vmp_int i;
    for(i=(vmp_int)content.size()-1;i>=0;i--)
        if((content[i] > 0x20) && (content[i] <= 0x7E))
            break;
    if(i < 0)
        return "";
    return content.substr(0,i+1);
}

vmp::str str_extract_char(vmp::str str,vmp::str clist)
{
    vmp::str ret="";
    for(vmp_index i=0;i<str.size();i++)
    {
        for(vmp_index j=0;j<clist.size();j++)
        {
            if(str[i] == clist[j])
            {
                ret+=clist[j];
                break;
            }
        }
    }
    return ret;
}

vmp::vector<vmp::str> str_remove_duplex(vmp::vector<vmp::str> svect)
{
    vmp::Table<vmp::str,void *> tmp;
    vmp::vector<vmp::str> ret;
    void *vtmp;
    for(vmp_index i=0;i<svect.size();i++)
    {
        if(!tmp.search(svect[i],&vtmp))
        {
            tmp.insert(svect[i],0);
            ret.push_back(svect[i]);
        }
    }
    return ret;
}

vmp::vector<vmp::str> str_regex_iterator(vmp::str str,std::regex regex)
{
    vmp::vector<vmp::str> ret;
    auto regex_begin=std::sregex_iterator(str.begin(),str.end(),regex);
    auto regex_end = std::sregex_iterator();
    for (std::sregex_iterator i = regex_begin; i != regex_end;i++)
    {
        std::smatch match = *i;
        ret.push_back(match.str());
    }
    vmp_size s=ret.size();
    if((s > 0) && (ret[s-1] == ""))
        ret.pop_back();
    return ret;
}

vmp::vector<vmp::str> str_regex(vmp::str str,vmp::str str_regex,vmp_bool icase)
{
    if (icase)
    {
        std::regex regex(str_regex,std::regex::ECMAScript|std::regex::icase);
        return vmp::unicode::str_regex_iterator(str,regex);
    }
    else
    {
        std::regex regex(str_regex,std::regex::ECMAScript);
        return vmp::unicode::str_regex_iterator(str,regex);
    }
}

vmp_bool str_regex_matching(vmp::str str,vmp::str str_regex)
{
    vmp::vector<vmp::str> m=vmp::unicode::str_regex(str,str_regex);
    if((m.size() == 1) && (m[0] == str))
        return true;
    return false;
}

vmp_bool str_invector(vmp::str value,vmp::vector<vmp::str> values)
{
    return vmp::invector<vmp::str>(value,values);
}

vmp::vector<vmp::str> shlex_split(vmp::str args)
{
    vmp::vector<vmp::str> ret;
    vmp::str token="";
    vmp_byte state=' ',escape=' ';
    vmp::str nextchar;
    vmp::str whitespace="[\\s\\t\\r\\n]";
    vmp::str wordchar="[\\w@%+=:,\\./\\-]";
    for(vmp_index i=0;i<args.size();i++)
    {
        vmp::unicode::str_write(&nextchar,"%c",args[i]);    
        switch(state)
        {
            case ' ':
               if(vmp::unicode::str_regex_matching(nextchar,whitespace))
               {
                   if(token != "")
                       ret.push_back(token);
                   token="";
               }
               else if(vmp::unicode::str_regex_matching(nextchar,wordchar))
                   token += nextchar;
               else if(nextchar == "\\")
               {
                   escape=state;
                   state='e';
               }
               else if(nextchar == "'")
                   state='s';
               else if(nextchar == "\"")
                   state='d';
               else
                   vmp::except("Args '%s' syntax error",args.c_str());
               break;
            case 'e'://escape
               token += nextchar;
               state=escape;
               break;
            case 's'://single quoted
               if(nextchar == "'")
                   state=' ';
               else if(nextchar == "\\")
               {
                   escape=state;
                   state='e';
               }
               else
                   token += nextchar;
               break;
            case 'd'://double quoted
               if(nextchar == "\"")
                   state=' ';
               else if(nextchar == "\\")
               {
                   escape=state;
                   state='e';
               }
               else
                   token += nextchar;
               break;
        }
    }
    if(state != ' ')
        vmp::except("Args %s syntax error",args.c_str());
    if(token != "")
        ret.push_back(token);
    return ret;
}

vmp::str shlex_join(vmp::vector<vmp::str> vargs)
{
    vmp::str ret="";
    vmp::str value;
    for(vmp_index i=0;i<vargs.size();i++)
    {
        if(vargs[i].size() != 0)
        {
            if(vmp::unicode::str_regex_matching(vargs[i],"[\\w@%+=:,\\./\\-]+"))
                value=vargs[i];
            else
            {
                value=vmp::unicode::str_replace(vargs[i],"\"","\\\"");
                vmp::unicode::str_write(&value,"\"%s\"",value.c_str());
            }
        }
        if(ret == "")
            vmp::unicode::str_write(&ret,"%s",value.c_str());
        else
            vmp::unicode::str_cwrite(&ret," %s",value.c_str());
    }
    return ret;
}

vmp::vector<vmp_index> str_toindex_list(vmp::str istr,vmp_index minvalue,vmp_index maxvalue)
{
    vmp::vector<vmp_index> ret;
    if(minvalue > maxvalue)
        vmp::except("minvalue %d not less maxvalue %d in input",minvalue,maxvalue);
    vmp::vector<vmp::str> split=vmp::unicode::str_split(istr,","),split2;
    vmp::str value;
    vmp_index min,max;
    for(vmp_index i=0;i<split.size();i++)
    {
        value=vmp::unicode::str_trim(split[i]);
        try
        {
            if(value == "*")
            {
                min=minvalue;
                max=maxvalue;
            }
            else if(vmp::unicode::str_regex_matching(value,"[0-9]+"))
            {
                min=(vmp_index)vmp::unicode::str_todigit_range(value,minvalue,maxvalue);
                max=min;
            }
            else if(vmp::unicode::str_regex_matching(value,"[0-9]+\\-[0-9]+"))
            {
                split2=vmp::unicode::str_split(value,"-");
                min=(vmp_index)vmp::unicode::str_todigit_range(split2[0],minvalue,maxvalue);
                max=(vmp_index)vmp::unicode::str_todigit_range(split2[1],minvalue,maxvalue);
            }
            else if(vmp::unicode::str_regex_matching(value,"[0-9]+\\-"))
            {
                value=vmp::unicode::str_sub(value,0,value.size()-1);
                min=(vmp_index)vmp::unicode::str_todigit_range(value,minvalue,maxvalue);
                max=maxvalue;
            }
            else if(vmp::unicode::str_regex_matching(value,"\\-[0-9]+"))
            {
                value=vmp::unicode::str_sub(value,1,value.size()-1);
                min=minvalue;
                max=(vmp_index)vmp::unicode::str_todigit_range(value,minvalue,maxvalue);
            }
            else
                vmp::except("");
            if((min < minvalue) || (max > maxvalue))
                vmp::except("interval range (%u-%u)",minvalue,maxvalue);
            for(vmp_index j=min;j<=max;j++)
                ret.push_back(j);
        }
        catch(vmp::exception &x)
        {
            vmp::except("Syntax error '%s' (%s)",value.c_str(),x.what());
        }
    }
    return vmp::sort<vmp_index>(ret,false,false);
}

Strtok::Strtok()
{
    reset();
}

Strtok::Strtok(vmp::str content)
{
    reset();
    set(content);
}

Strtok::~Strtok()
{
    reset();
}

void Strtok::reset()
{
    content_="";
    saveptr_=0;
}

void Strtok::set(vmp::str content)
{
    reset();
    content_=content;
}

vmp::str Strtok::next(vmp::str delim)
{
    vmp_char *ret;
    vmp::str value;
    if(saveptr_ == 0)
        ret=vmp::strtok_r_wrap((vmp_char *)content_.c_str(),delim.c_str(),&saveptr_);
    else
        ret=vmp::strtok_r_wrap(0,delim.c_str(),&saveptr_);
    if(ret == 0)
        return "";
    value=ret;
    return value;
}

vmp_char Strtok::get_char()
{
    if(saveptr_ == 0 || saveptr_[0] == '\0')
        return '\0';
    return saveptr_[0];
}

void Strtok::jump_chars(vmp_index n)
{
    if(saveptr_ != 0)
    {
        while((n > 0) && (saveptr_[0] != '\0'))
        {
            saveptr_++;
            n--;
        }
    }
}

vmp::str b64_encode(vmp::str src)
{
    vmp::str enc="";
    vmp_byte tmp[3],buf[4];
    vmp_index j=0,z;
    for(vmp_index i=0;i<src.size();i++)
    {
        tmp[j++]=src[i];
        if(j == 3)
        {
             buf[0]=(tmp[0] & 0xfc) >> 2;
             buf[1]=((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4);
             buf[2]=((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6);
             buf[3]=tmp[2] & 0x3f;
             for (z = 0;z < 4;z++) 
                 vmp::unicode::str_cwrite(&enc,"%c",vmp::unicode::b64_table[buf[z]]);
             j=0;
        }
    }
    if(j > 0)
    {
        for (z = j; z < 3;z++) 
            tmp[z] = '\0';
        // perform same codec as above
        buf[0] = (tmp[0] & 0xfc) >> 2;
        buf[1] = ((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4);
        buf[2] = ((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6);
        buf[3] = tmp[2] & 0x3f;

        for (z = 0; (z < (j + 1));z++)
            vmp::unicode::str_cwrite(&enc,"%c",vmp::unicode::b64_table[buf[z]]);
        while(j++ < 3)
            vmp::unicode::str_cwrite(&enc,"=");
    }
    return enc;
}

vmp::str b64_decode(vmp::str enc)
{
    vmp::str src="";
    vmp_index j=0,z,l;
    vmp_byte buf[3],tmp[4];
    for(vmp_index i=0;i<enc.size();i++)
    {
        if (enc[i] == '=') 
            break;    
        if (!(vmp::unicode::byte_istype(enc[i],"alnum") || enc[i] == '+'  || enc[i] == '/' ))
            vmp::except("vmp::unicode::b64_decode(enc=%s) encoded input string bad value",enc.c_str());
        tmp[j++]=enc[i];
        if(j == 4)
        {
            for (j = 0;j < 4;j++) 
            {
                // find translation char in `b64_table'
                for (z = 0; z < 64;z++)
                {
                    if (tmp[j] == b64_table[z])
                    {
                        tmp[j] = z;
                        break;
                    }
                }
            }
            buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4);
            buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2);
            buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3];
            for (z = 0;z < 3;z++) 
                vmp::unicode::str_cwrite(&src,"%c",buf[z]);
            j=0;    
        }
    }
    if (j > 0) 
    {
        for (z = j; z < 4;z++)
            tmp[z] = '\0';
        for (z = 0; z < 4;z++) 
        {
            // find translation char in `b64_table'
            for (l = 0; l < 64;l++) 
            {
                if (tmp[z] == b64_table[l]) 
                {
                    tmp[z] = l;
                    break;
                }
            }
        }
        buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4);
        buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2);
        buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3];
        for (z = 0; (z < j - 1);z++)
            vmp::unicode::str_cwrite(&src,"%c",buf[z]);
    }
    return src;
}

vmp_bool utf8_check(vmp::str src)
{
    vmp_size len=src.size();
    vmp_index n;
    for (vmp_index i = 0; i < len;i++)
    {
        vmp_byte c = (vmp_byte) src.c_str()[i];
        if (0x00 <= c && c <= 0x7f) 
            n=0;
        else if ((c & 0xE0) == 0xC0)
            n=1;
        else if ( c==0xed && i<(len-1) && ((vmp_byte)src.c_str()[i+1] & 0xa0)==0xa0)
            return false;
        else if ((c & 0xF0) == 0xE0)
            n=2;
        else if ((c & 0xF8) == 0xF0)
            n=3;
        else 
            return false;
        for (vmp_index j = 0; j < n && i < len;j++) 
            if ((++i == len) || (((vmp_byte)src.c_str()[i] & 0xC0) != 0x80))
                return false;
    }
    return true;
}

}}}

