RegExp
От: Anton Batenev Россия https://github.com/abbat
Дата: 31.08.05 16:48
Оценка: 16 (3)
Hello, All

На форумах RSDN частенько пролетает тема про регулярные выражения в стиле "чтобы не таскать за собой PRCE / boost". Как известно, есть в Windows Scripting Host свой класс для работы с регулярными выражениями. Он идет прицепом в Windows Scripting Host, начиная с версии 5.5 (для тех, у кого установлен IE 6.0 и выше это вообще не имеет значения, т.к. он уже в него включен). Хоть реализация и проста, но не так очевидна. Представляю свою реализацию. В коде используется самописный варпер для типа BSTR — BSTRString — его реализацию не сложно организовать самому так как угодно душе реализатора.


regexp.h

#pragma once
//---------------------------------------------------------------------------

interface IRegExp : public IDispatch
{
    public:

        virtual HRESULT STDMETHODCALLTYPE get_Pattern    (LPBSTR pPattern)                                           = 0;
        virtual HRESULT STDMETHODCALLTYPE set_Pattern    (BSTR   pPattern)                                           = 0;
        
        virtual HRESULT STDMETHODCALLTYPE get_IgnoreCase (VARIANT_BOOL* pIgnoreCase)                                 = 0;
        virtual HRESULT STDMETHODCALLTYPE set_IgnoreCase (VARIANT_BOOL  pIgnoreCase)                                 = 0;
        
        virtual HRESULT STDMETHODCALLTYPE get_Global     (VARIANT_BOOL* pGlobal)                                     = 0;
        virtual HRESULT STDMETHODCALLTYPE set_Global     (VARIANT_BOOL  pGlobal)                                     = 0;
        
        virtual HRESULT STDMETHODCALLTYPE Execute        (BSTR sourceString, LPDISPATCH* ppMatches)                  = 0;
        
        virtual HRESULT STDMETHODCALLTYPE Test           (BSTR sourceString, VARIANT_BOOL* pMatch)                   = 0;
        
        virtual HRESULT STDMETHODCALLTYPE Replace        (BSTR sourceString, BSTR replaceString, LPBSTR pDestString) = 0;
};
//---------------------------------------------------------------------------

interface IMatch : public IDispatch
{
    public:
        
        virtual HRESULT STDMETHODCALLTYPE get_Value      (LPBSTR pValue)      = 0;
        virtual HRESULT STDMETHODCALLTYPE get_FirstIndex (LPLONG pFirstIndex) = 0;
        virtual HRESULT STDMETHODCALLTYPE get_Length     (LPLONG pLength)     = 0;
};
//---------------------------------------------------------------------------

interface IMatchCollection : public IDispatch
{
    public:

        virtual HRESULT STDMETHODCALLTYPE get_Item     (LONG index, LPDISPATCH* ppMatch) = 0;
        virtual HRESULT STDMETHODCALLTYPE get_Count    (LPLONG pCount)                   = 0;
        virtual HRESULT STDMETHODCALLTYPE get__NewEnum (LPUNKNOWN* ppEnum)               = 0;
};
//---------------------------------------------------------------------------

interface IRegExp2 : public IDispatch
{
    public:

        virtual HRESULT STDMETHODCALLTYPE get_Pattern    (LPBSTR pPattern)                                              = 0;
        virtual HRESULT STDMETHODCALLTYPE set_Pattern    (BSTR   pPattern)                                              = 0;
        
        virtual HRESULT STDMETHODCALLTYPE get_IgnoreCase (VARIANT_BOOL* pIgnoreCase)                                    = 0;
        virtual HRESULT STDMETHODCALLTYPE set_IgnoreCase (VARIANT_BOOL  pIgnoreCase)                                    = 0;
        
        virtual HRESULT STDMETHODCALLTYPE get_Global     (VARIANT_BOOL* pGlobal)                                        = 0;
        virtual HRESULT STDMETHODCALLTYPE set_Global     (VARIANT_BOOL  pGlobal)                                        = 0;

        virtual HRESULT STDMETHODCALLTYPE get_Multiline  (VARIANT_BOOL* pMultiline)                                     = 0;
        virtual HRESULT STDMETHODCALLTYPE set_Multiline  (VARIANT_BOOL  pMultiline)                                     = 0;

        virtual HRESULT STDMETHODCALLTYPE Execute        (BSTR sourceString, LPDISPATCH* ppMatches)                     = 0;
        
        virtual HRESULT STDMETHODCALLTYPE Test           (BSTR sourceString, VARIANT_BOOL* pMatch)                      = 0;
        
        virtual HRESULT STDMETHODCALLTYPE Replace        (BSTR sourceString, VARIANT replaceString, LPBSTR pDestString) = 0;
};
//---------------------------------------------------------------------------

interface IMatch2 : public IDispatch
{
    public:
        
        virtual HRESULT STDMETHODCALLTYPE get_Value      (LPBSTR pValue)            = 0;
        virtual HRESULT STDMETHODCALLTYPE get_FirstIndex (LPLONG pFirstIndex)       = 0;
        virtual HRESULT STDMETHODCALLTYPE get_Length     (LPLONG pLength)           = 0;
        virtual HRESULT STDMETHODCALLTYPE get_SubMatches (LPDISPATCH* ppSubMatches) = 0;
};
//---------------------------------------------------------------------------

interface IMatchCollection2 : public IDispatch
{
    public:

        virtual HRESULT STDMETHODCALLTYPE get_Item     (LONG index, LPDISPATCH* ppMatch) = 0;
        virtual HRESULT STDMETHODCALLTYPE get_Count    (LPLONG pCount)                   = 0;
        virtual HRESULT STDMETHODCALLTYPE get__NewEnum (LPUNKNOWN* ppEnum)               = 0;
};
//---------------------------------------------------------------------------

interface ISubMatches : public IDispatch
{
    public:

        virtual HRESULT STDMETHODCALLTYPE get_Item     (LONG index, LPVARIANT pSubMatch) = 0;
        virtual HRESULT STDMETHODCALLTYPE get_Count    (LPLONG pCount)                   = 0;
        virtual HRESULT STDMETHODCALLTYPE get__NewEnum (LPUNKNOWN* ppEnum)               = 0;
};
//---------------------------------------------------------------------------

class RegExp
{
    public:

        RegExp  (void);
        ~RegExp (void);

        bool SetPattern (const BSTRString &pattern);
        
        bool Execute (const BSTRString &source);
        bool Execute (const BSTRString &source, const BSTRString &pattern);

        bool Test (const BSTRString &source);
        bool Test (const BSTRString &source, const BSTRString &pattern);

        BSTRString Replace (const BSTRString &source, const BSTRString &replace);
        BSTRString Replace (const BSTRString &source, const BSTRString &pattern, const BSTRString &replace);

        DWORD MatchCount (void);

        BSTRString GetMatch (DWORD dwMatchIndex);

        DWORD SubMatchCount (DWORD dwMatchIndex);

        BSTRString GetSubMatch (DWORD dwMatchIndex, DWORD dwSubMatchIndex);

    private:

        IRegExp2*          m_regexp;
        IMatchCollection2* m_matches;
};
//---------------------------------------------------------------------------



regexp.cpp

#include "regexp.h"
//---------------------------------------------------------------------------
const GUID LIBID_VBScript_RegExp_55 = {0x3F4DACA7, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IRegExp              = {0x3F4DACA0, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IMatch               = {0x3F4DACA1, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IMatchCollection     = {0x3F4DACA2, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IRegExp2             = {0x3F4DACB0, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IMatch2              = {0x3F4DACB1, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_IMatchCollection2    = {0x3F4DACB2, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID IID_ISubMatches          = {0x3F4DACB3, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID CLSID_RegExp             = {0x3F4DACA4, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID CLSID_Match              = {0x3F4DACA5, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID CLSID_MatchCollection    = {0x3F4DACA6, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
const GUID CLSID_SubMatches         = {0x3F4DACC0, 0x160D, 0x11D2, {0xA8, 0xE9, 0x00, 0x10, 0x4B, 0x36, 0x5C, 0x9F}};
//---------------------------------------------------------------------------

RegExp::RegExp (void)
{
    m_regexp  = 0;
    m_matches = 0;

    if (SUCCEEDED(CoCreateInstance(CLSID_RegExp, 0, CLSCTX_ALL, IID_IRegExp2, (void**)&m_regexp)))
    {
        m_regexp->set_IgnoreCase(VARIANT_TRUE);
        m_regexp->set_Multiline(VARIANT_TRUE);
        m_regexp->set_Global(VARIANT_TRUE);
    }
}
//---------------------------------------------------------------------------

RegExp::~RegExp (void)
{
    if (m_matches)
    {
        m_matches->Release();
        m_matches = 0;
    }
    
    if (m_regexp)
    {
        m_regexp->Release();
        m_regexp = 0;
    }
}
//---------------------------------------------------------------------------

bool RegExp::SetPattern (const BSTRString &pattern)
{
    if (!m_regexp)
        return false;

    if (m_matches)
    {
        m_matches->Release();
        m_matches = 0;
    }

    return SUCCEEDED(m_regexp->set_Pattern(pattern.Data()));
}
//---------------------------------------------------------------------------

bool RegExp::Execute (const BSTRString &source)
{
    if (!m_regexp)
        return false;

    if (m_matches)
    {
        m_matches->Release();
        m_matches = 0;
    }

    IDispatch* disp = 0;
    if (FAILED(m_regexp->Execute(source.Data(), &disp)))
        return false;

    if (FAILED(disp->QueryInterface(IID_IMatchCollection2, (void**)&m_matches)))
    {
        disp->Release();
        return false;
    }

    disp->Release();

    return MatchCount() != 0;
}
//---------------------------------------------------------------------------

bool RegExp::Execute (const BSTRString &source, const BSTRString &pattern)
{
    return SetPattern(pattern) && Execute(source);
}
//---------------------------------------------------------------------------

bool RegExp::Test (const BSTRString &source)
{
    if (!m_regexp)
        return false;

    if (m_matches)
    {
        m_matches->Release();
        m_matches = 0;
    }

    VARIANT_BOOL vbResult = VARIANT_FALSE;
    
    if (FAILED(m_regexp->Test(source.Data(), &vbResult)))
        return false;

    return vbResult == VARIANT_TRUE;
}
//---------------------------------------------------------------------------

bool RegExp::Test (const BSTRString &source, const BSTRString &pattern)
{
    return SetPattern(pattern) && Test(source);
}
//---------------------------------------------------------------------------

DWORD RegExp::MatchCount (void)
{
    if (!m_matches)
        return 0;
    
    long lCount = 0;

    if (FAILED(m_matches->get_Count(&lCount)))
        return 0;

    return (DWORD)lCount;
}
//---------------------------------------------------------------------------

BSTRString RegExp::GetMatch (DWORD dwMatchIndex)
{
    DWORD dwCount = MatchCount();
    
    if (dwMatchIndex >= dwCount)
        return TEXT("");

    IMatch2*   match = 0;
    IDispatch* disp  = 0;
    if (FAILED(m_matches->get_Item(dwMatchIndex, &disp)))
        return TEXT("");

    if (FAILED(disp->QueryInterface(IID_IMatch2, (void**)&match)))
    {
        disp->Release();
        return TEXT("");
    }

    disp->Release();

    BSTR bstrMatch = 0;
    if (FAILED(match->get_Value(&bstrMatch)))
    {
        match->Release();
        return TEXT("");
    }

    BSTRString Match;
    Match.Attach(bstrMatch);
    
    match->Release();

    return Match;
}
//---------------------------------------------------------------------------

DWORD RegExp::SubMatchCount (DWORD dwMatchIndex)
{
    DWORD dwCount = MatchCount();
    
    if (dwMatchIndex >= dwCount)
        return 0;

    IMatch2*   match = 0;
    IDispatch* disp  = 0;
    if (FAILED(m_matches->get_Item(dwMatchIndex, &disp)))
        return 0;

    if (FAILED(disp->QueryInterface(IID_IMatch2, (void**)&match)))
    {
        disp->Release();
        return 0;
    }

    disp->Release();
    disp = 0;

    ISubMatches* sub_matches = 0;
    if (FAILED(match->get_SubMatches(&disp)))
    {
        match->Release();
        return 0;
    }

    if (FAILED(disp->QueryInterface(IID_ISubMatches, (void**)&sub_matches)))
    {
        disp->Release();
        match->Release();
        return 0;
    }

    disp->Release();

    long lCount = 0;
    if (FAILED(sub_matches->get_Count(&lCount)))
    {
        sub_matches->Release();
        match->Release();
        return 0;
    }

    sub_matches->Release();
    match->Release();

    return (DWORD)lCount;
}
//---------------------------------------------------------------------------

BSTRString RegExp::GetSubMatch (DWORD dwMatchIndex, DWORD dwSubMatchIndex)
{
    DWORD dwCount = MatchCount();
    
    if (dwMatchIndex >= dwCount)
        return TEXT("");

    IMatch2*   match = 0;
    IDispatch* disp  = 0;
    if (FAILED(m_matches->get_Item(dwMatchIndex, &disp)))
        return TEXT("");

    if (FAILED(disp->QueryInterface(IID_IMatch2, (void**)&match)))
    {
        disp->Release();
        return TEXT("");
    }

    disp->Release();
    disp = 0;

    ISubMatches* sub_matches = 0;
    if (FAILED(match->get_SubMatches(&disp)))
    {
        match->Release();
        return TEXT("");
    }

    if (FAILED(disp->QueryInterface(IID_ISubMatches, (void**)&sub_matches)))
    {
        disp->Release();
        match->Release();
        return TEXT("");
    }

    disp->Release();

    long lCount = 0;
    if (FAILED(sub_matches->get_Count(&lCount)) || dwSubMatchIndex >= (DWORD)lCount)
    {
        sub_matches->Release();
        match->Release();
        return TEXT("");
    }

    VARIANT v;
    VariantInit(&v);
    if (FAILED(sub_matches->get_Item(dwSubMatchIndex, &v)))
    {
        sub_matches->Release();
        match->Release();
        return TEXT("");
    }

    if (FAILED(VariantChangeType(&v, &v, 0, VT_BSTR)))
    {
        sub_matches->Release();
        match->Release();
        VariantClear(&v);
        return TEXT("");
    }

    BSTRString SubMatch;
    SubMatch.Attach(v.bstrVal);

    sub_matches->Release();
    match->Release();

    return SubMatch;
}
//---------------------------------------------------------------------------

 BSTRString RegExp::Replace (const BSTRString &source, const BSTRString &replace)
{
    if (!m_regexp)
        return TEXT("");

    if (m_matches)
    {
        m_matches->Release();
        m_matches = 0;
    }

    BSTR bstrResult = 0;

    VARIANT varReplace;
    VariantInit(&varReplace);

    varReplace.vt      = VT_BSTR;
    varReplace.bstrVal = replace.Data();

    if (FAILED(m_regexp->Replace(source.Data(), varReplace, &bstrResult)))
        return TEXT("");

    BSTRString result;
    result.Attach(bstrResult);

    return result;
}
//---------------------------------------------------------------------------

BSTRString RegExp::Replace (const BSTRString &source, const BSTRString &pattern, const BSTRString &replace)
{
    if (!SetPattern(pattern))
        return TEXT("");

    return Replace(source, replace);
}
//---------------------------------------------------------------------------
... << RSDN@Home 1.1.4 beta 7 rev. 447>>
 
Подождите ...
Wait...
Пока на собственное сообщение не было ответов, его можно удалить.