gordon's note infotech C/C++ std::string/std::wstring converting

C/C++ std::string/std::wstring converting

myconvert.h
#pragma once

std::string wstring_to_string(const wchar_t* in);                   // wchar_t* -> UTF8 string
std::wstring string_to_wstring(const char* in);                     // UTF8 char* -> wstring
void string_to_wstring(std::wstring &out, const std::string &in);   // UTF8 string -> wstring
std::wstring string_to_wstring(const std::string &in);              // UTF8 string -> wstring

myconvert.cpp
#include <string>
#include "myconvert.h"

// wchar_t* -> UTF8 string
//
std::string wstring_to_string(const wchar_t* in)
{
	std::string out;
	unsigned int codepoint;
	while (*in != 0)
	{
		if (*in >= 0xd800 && *in <= 0xdbff)
		{
			codepoint = ((*in - 0xd800) << 10) + 0x10000;
		}
		else
		{
			if (*in >= 0xdc00 && *in <= 0xdfff)
				codepoint |= *in - 0xdc00;
			else
				codepoint = *in;
			if (codepoint <= 0x7f)
			{
				out.append(1, static_cast<char>(codepoint));
			}
			else if (codepoint <= 0x7ff)
			{
				out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
			}
			else if (codepoint <= 0xffff)
			{
				out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
				out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
			}
			else
			{
				out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
				out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
				out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
				out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
			}
		}
		++in;
	}
	return out;
}

// UTF8 char* -> wstring
//
std::wstring string_to_wstring(const char* in)
{
	std::wstring out;
	unsigned int codepoint;
	while (*in != 0)
	{
		unsigned char ch = static_cast<unsigned char>(*in);
		if (ch <= 0x7f)
			codepoint = ch;
		else if (ch <= 0xbf)
			codepoint = (codepoint << 6) | (ch & 0x3f);
		else if (ch <= 0xdf)
			codepoint = ch & 0x1f;
		else if (ch <= 0xef)
			codepoint = ch & 0x0f;
		else
			codepoint = ch & 0x07;
		++in;
		if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff))
		{
			if (sizeof(wchar_t) > 2)
			{
				out.append(1, static_cast<wchar_t>(codepoint));
			}
			else if (codepoint > 0xffff)
			{
				out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
				out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
			}
			else if (codepoint < 0xd800 || codepoint >= 0xe000)
			{
				out.append(1, static_cast<wchar_t>(codepoint));
			}
		}
	}
	return out;
}

// UTF8 string -> wstring
//
void string_to_wstring(std::wstring &out, const std::string &in)
{
	std::wstring w(in.begin(), in.end());

	out = w;
}

// UTF8 string -> wstring
//
std::wstring string_to_wstring(const std::string &in)
{
	std::wstring w(in.begin(), in.end());

	return w;
}

    #C++ #UTF-8 #char #wchar_t #wstring #convert

Leave a Reply

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *

Related Post