Здравствуйте TaXa, Вы писали:
TX>Помогите пожалуйста, есть файл в кодировке Windows 1251, кириллицу в файле надо перекодировать в UTF-8. Пример пожалуйста
Есть в винде такая либа mlang.dll. В ней есть все функции по перекодировке кодировок.
// CharsetDecoder.h: interface for the CCharsetDecoder class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_CHARSETDECODER_H__7D78CFEC_96E6_4F82_8CC0_52C19AF153D6__INCLUDED_)
#define AFX_CHARSETDECODER_H__7D78CFEC_96E6_4F82_8CC0_52C19AF153D6__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
class CCharsetDecoder
{
public:
CString m_sCharSetName;
CCharsetDecoder(CString sCharSetName): m_sCharSetName(sCharSetName) {};
CCharsetDecoder(UINT CodePage);
~CCharsetDecoder(){};
CString Encode(CString sString); // Encodes the string in default charset (e.g. 1251 for Russian) to the given codepage
CString Decode(CString sString); // Decodes the string in the given charset (e.g. koi8-r/*=20866)*/ to the default charset
private:
CString Recode(CString sString, bool bEncode);
};
#endif // !defined(AFX_CHARSETDECODER_H__7D78CFEC_96E6_4F82_8CC0_52C19AF153D6__INCLUDED_)
// CharsetDecoder.cpp: implementation of the CCharsetDecoder class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "CharsetDecoder.h"
#include <mlang.h>
#include <atlbase.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CString CCharsetDecoder::Decode(CString sString)
{
return Recode(sString, false);
}
CString CCharsetDecoder::Encode(CString sString)
{
return Recode(sString, true);
}
CString CCharsetDecoder::Recode(CString sString, bool bEncode)
{
CString sResult;
HRESULT hr;
CComPtr<IMultiLanguage> pLang;
hr = CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage, (LPVOID*)&pLang);
if (hr == S_OK)
{
CComBSTR bsCharsetName = m_sCharSetName; // all required conversions are done here;
MIMECSETINFO info;
hr = pLang->GetCharsetInfo(bsCharsetName, &info);
if (hr == S_OK)
{
DWORD dwMode=0;
UINT SrcSize=sString.GetLength(), DstSize=SrcSize;
hr = bEncode ?
pLang->ConvertString(&dwMode, info.uiCodePage,
info.uiInternetEncoding, (BYTE*)LPCSTR(sString), &SrcSize,
(BYTE *)sResult.GetBuffer(DstSize), &DstSize)
:
pLang->ConvertString(&dwMode, info.uiInternetEncoding,
info.uiCodePage, (BYTE*)LPCSTR(sString), &SrcSize,
(BYTE *)sResult.GetBuffer(DstSize), &DstSize);
if (hr == S_OK)
{
sResult.ReleaseBuffer(DstSize);
return sResult;
}
}
}
return sString;
}
CCharsetDecoder::CCharsetDecoder(UINT CodePage)
{
CComPtr<IMultiLanguage> pLang;
HRESULT hr;
hr = CoCreateInstance(CLSID_CMultiLanguage, NULL, CLSCTX_INPROC_SERVER, IID_IMultiLanguage, (LPVOID*)&pLang);
if (hr == S_OK)
{
// CComBSTR bsCharsetName = m_sCharSetName; // all required conversions are done here;
MIMECPINFO info;
hr = pLang->GetCodePageInfo(CodePage, &info);
if (hr == S_OK)
{
m_sCharSetName = info.wszHeaderCharset;
}
}
}