1
0
mirror of https://github.com/ONLYOFFICE/core.git synced 2025-04-18 14:04:06 +03:00
core/HtmlFile/HtmlFile.cpp
Elena.Subbotina baaf79a5dd .
2023-03-29 10:44:09 +03:00

1498 lines
55 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* (c) Copyright Ascensio System SIA 2010-2023
*
* This program is a free software product. You can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License (AGPL)
* version 3 as published by the Free Software Foundation. In accordance with
* Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect
* that Ascensio System SIA expressly excludes the warranty of non-infringement
* of any third-party rights.
*
* This program is distributed WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For
* details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
*
* You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish
* street, Riga, Latvia, EU, LV-1050.
*
* The interactive user interfaces in modified source and object code versions
* of the Program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU AGPL version 3.
*
* Pursuant to Section 7(b) of the License you must retain the original Product
* logo when distributing the program. Pursuant to Section 7(e) we decline to
* grant you any rights under trademark law for use of our trademarks.
*
* All the Product's GUI elements, including illustrations and icon sets, as
* well as technical writing content are licensed under the terms of the
* Creative Commons Attribution-ShareAlike 4.0 International. See the License
* terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
*
*/
#ifdef WIN32
#include <windows.h>
#endif
#include "HtmlFile.h"
#include "../DesktopEditor/common/File.h"
#include "../DesktopEditor/common/Directory.h"
#include "../DesktopEditor/common/StringBuilder.h"
#include "../DesktopEditor/common/StringExt.h"
#include "../DesktopEditor/xml/include/xmlutils.h"
#include "../DesktopEditor/fontengine/application_generate_fonts_common.h"
#include <vector>
#include <map>
#ifdef LINUX
#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>
extern char** environ;
char** linux_environ_get(const std::string& str_library_path = "")
{
int count = 0;
for (int i = 0; environ[i] != NULL; i++)
++count;
count += 2;
char** env = new char*[count];
for (int i = 0; i < count; i++)
env[i] = NULL;
bool is_ld_library_path = false;
for (int i = 0; environ[i] != NULL; i++)
{
std::string s = environ[i];
if (0 == s.find("LD_LIBRARY_PATH"))
{
s += (":" + str_library_path);
is_ld_library_path = true;
}
env[i] = new char[s.length() + 1];
memcpy(env[i], s.c_str(), s.length() * sizeof(char));
env[i][s.length()] = '\0';
}
if (!is_ld_library_path)
{
int index = count - 2;
std::string s = "LD_LIBRARY_PATH=";
s += str_library_path;
env[index] = new char[s.length() + 1];
memcpy(env[index], s.c_str(), s.length() * sizeof(char));
env[index][s.length()] = '\0';
}
return env;
}
void linux_environ_clear(char** data)
{
for (int i = 0; data[i] != NULL; i++)
delete [] data[i];
delete [] data;
}
void linux_environ_print(char** env)
{
std::wstring file = NSFile::GetProcessDirectory() + L"/env.log";
std::string fileA = U_TO_UTF8(file);
for (int i = 0; env[i] != NULL; i++)
{
FILE* f = fopen(fileA.c_str(), "a+");
std::string s = env[i];
// replace %%
size_t posn = 0;
while (std::string::npos != (posn = s.find("%", posn)))
{
s.replace(posn, 1, "%%");
posn += 2;
}
fprintf(f, s.c_str());
fprintf(f, "\n");
fclose(f);
}
}
#endif
class CHtmlFile_Private
{
public:
bool m_bIsEpub;
std::wstring m_sHtmlFileInternal;
public:
CHtmlFile_Private()
{
m_bIsEpub = false;
m_sHtmlFileInternal = L"";
std::wstring sProcessPath = NSFile::GetProcessDirectory();
std::wstring sPathConfig = sProcessPath + L"/DoctRenderer.config";
if (NSFile::CFileBinary::Exists(sPathConfig))
{
XmlUtils::CXmlNode oNode;
if (oNode.FromXmlFile(sPathConfig))
{
std::wstring sPath = oNode.ReadValueString(L"htmlfileinternal");
if (!sPath.empty())
{
if (0 == sPath.find(L"./"))
sPath = sProcessPath + sPath.substr(1);
m_sHtmlFileInternal = sPath;
}
}
}
}
};
CHtmlFile::CHtmlFile()
{
m_internal = new CHtmlFile_Private();
}
CHtmlFile::~CHtmlFile()
{
RELEASEOBJECT(m_internal);
}
static std::wstring GetSdkPath()
{
std::wstring sProcess = NSFile::GetProcessDirectory() + L"/";
std::wstring sPathConfig = sProcess + L"DoctRenderer.config";
XmlUtils::CXmlNode oNode;
if (!oNode.FromXmlFile(sPathConfig))
return L"";
std::wstring sPath = oNode.ReadValueString(L"DoctSdk");
if (NSFile::CFileBinary::Exists(sPath) && !NSFile::CFileBinary::Exists(sProcess + sPath))
return sPath;
return sProcess + sPath;
}
static std::wstring CorrectHtmlPath(const std::wstring& sPath)
{
std::wstring sReturn = sPath;
NSStringExt::Replace(sReturn, L"\\", L"/");
if (std::wstring::npos != sReturn.find(L"://"))
return sReturn;
if (sReturn.find(L"//") == 0)
return L"file:" + sReturn;
if (!sPath.empty())
{
wchar_t c = sPath.c_str()[0];
if (c == wchar_t('/'))
return L"file://" + sReturn;
}
return L"file:///" + sReturn;
}
static bool IsLinuxXVFB()
{
std::wstring sProcess = NSFile::GetProcessDirectory() + L"/";
std::wstring sPathConfig = sProcess + L"DoctRenderer.config";
XmlUtils::CXmlNode oNode;
if (oNode.FromXmlFile(sPathConfig))
{
std::vector<XmlUtils::CXmlNode> oNodes;
if (oNode.GetNodes(L"htmlnoxvfb", oNodes))
{
if (oNodes.size() == 1)
return false;
}
}
else if (oNode.FromXmlFile(sProcess + L"converter/DoctRenderer.config"))
{
std::vector<XmlUtils::CXmlNode> oNodes;
if (oNode.GetNodes(L"htmlnoxvfb", oNodes))
{
if (oNodes.size() == 1)
return false;
}
}
return true;
}
static void GetScriptsPath(NSStringUtils::CStringBuilder& oBuilder)
{
std::wstring sProcess = NSFile::GetProcessDirectory() + L"/";
std::wstring sPathConfig = sProcess + L"DoctRenderer.config";
XmlUtils::CXmlNode oNode;
if (!oNode.FromXmlFile(sPathConfig))
return;
std::vector<XmlUtils::CXmlNode> oNodesFiles;
if (oNode.GetNodes(L"file", oNodesFiles))
{
size_t nCount = oNodesFiles.size();
for (size_t i = 0; i < nCount; ++i)
{
XmlUtils::CXmlNode &_node = oNodesFiles[i];
std::wstring sFile = _node.GetText();
if (!NSFile::CFileBinary::Exists(sFile) || NSFile::CFileBinary::Exists(sProcess + sFile))
sFile = sProcess + sFile;
sFile = CorrectHtmlPath(sFile);
bool bIsNeedAdd = true;
if (std::wstring::npos != sFile.find(L"/Native/"))
bIsNeedAdd = false;
if (bIsNeedAdd)
{
oBuilder.WriteString(L"<sdk>");
oBuilder.WriteEncodeXmlString(sFile);
oBuilder.WriteString(L"</sdk>");
}
}
}
std::vector<XmlUtils::CXmlNode> oNodesHtmlFiles;
if (oNode.GetNodes(L"htmlfile", oNodesHtmlFiles))
{
size_t nCount = oNodesHtmlFiles.size();
for (int i = 0; i < nCount; ++i)
{
XmlUtils::CXmlNode &_node = oNodesHtmlFiles[i];
std::wstring sFile = _node.GetText();
if (!NSFile::CFileBinary::Exists(sFile) || NSFile::CFileBinary::Exists(sProcess + sFile))
sFile = sProcess + sFile;
sFile = CorrectHtmlPath(sFile);
oBuilder.WriteString(L"<sdk>");
oBuilder.WriteEncodeXmlString(sFile);
oBuilder.WriteString(L"</sdk>");
}
}
XmlUtils::CXmlNode oNodeSdk = oNode.ReadNode(L"DoctSdk");
std::vector<XmlUtils::CXmlNode> oNodes;
if (oNodeSdk.GetNodes(L"file", oNodes))
{
size_t nCount = oNodes.size();
for (size_t i = 0; i < nCount; ++i)
{
XmlUtils::CXmlNode &_node = oNodes[i];
std::wstring sPath = _node.GetText();
if (!NSFile::CFileBinary::Exists(sPath) || NSFile::CFileBinary::Exists(sProcess + sPath))
sPath = sProcess + sPath;
oBuilder.WriteString(L"<sdk>");
oBuilder.WriteEncodeXmlString(CorrectHtmlPath(sPath));
oBuilder.WriteString(L"</sdk>");
}
}
else
{
std::wstring sPath = oNodeSdk.GetText();
if (!NSFile::CFileBinary::Exists(sPath) || NSFile::CFileBinary::Exists(sProcess + sPath))
sPath = sProcess + sPath;
oBuilder.WriteString(L"<sdk>");
oBuilder.WriteEncodeXmlString(CorrectHtmlPath(sPath));
oBuilder.WriteString(L"</sdk>");
}
}
int CHtmlFile::Convert(const std::vector<std::wstring>& arFiles, const std::wstring& sDstfolder)
{
std::wstring sInternal = m_internal->m_sHtmlFileInternal;
if (sInternal.empty())
sInternal = NSFile::GetProcessDirectory() + L"/HtmlFileInternal/";
sInternal += L"HtmlFileInternal";
#ifdef WIN32
sInternal += L".exe";
#endif
int nReturnCode = 0;
NSStringUtils::CStringBuilder oBuilder;
oBuilder.WriteString(L"<html>");
GetScriptsPath(oBuilder);
// destination
oBuilder.WriteString(L"<destination>");
std::wstring sDstOut = sDstfolder;
NSStringExt::Replace(sDstOut, L"\\", L"/");
oBuilder.WriteEncodeXmlString(sDstOut);
if (!sDstOut.empty())
{
wchar_t _c = sDstOut.c_str()[sDstOut.length() - 1];
if (_c != '/')
oBuilder.AddCharSafe('/');
}
oBuilder.WriteString(L"</destination>");
std::vector<std::wstring> arTmpFiles;
for (std::vector<std::wstring>::const_iterator iter = arFiles.begin(); iter != arFiles.end(); iter++)
{
oBuilder.WriteString(L"<file>");
std::wstring sFilePath = *iter;
std::wstring sExt = NSCommon::GetFileExtention(sFilePath);
NSCommon::makeUpperW(sExt);
if (sExt == L"HTML" || sExt == L"HTM" || sExt == L"XHTML")
oBuilder.WriteEncodeXmlString(CorrectHtmlPath(sFilePath));
else
{
std::wstring sTmpDir = L"";
if (m_internal->m_bIsEpub)
{
// чтобы ссылки на картинки остались
sTmpDir = NSFile::GetDirectoryName(sFilePath);
if (!NSDirectory::Exists(sTmpDir))
sTmpDir = NSDirectory::GetTempPath();
}
else
{
sTmpDir = NSDirectory::GetTempPath();
}
std::wstring sTmpFile = NSFile::CFileBinary::CreateTempFileWithUniqueName(sTmpDir, L"HTM");
if (NSFile::CFileBinary::Exists(sTmpFile))
NSFile::CFileBinary::Remove(sTmpFile);
sTmpFile = sTmpFile + L".html";
NSFile::CFileBinary::Copy(sFilePath, sTmpFile);
oBuilder.WriteEncodeXmlString(CorrectHtmlPath(sTmpFile));
arTmpFiles.push_back(sTmpFile);
}
oBuilder.WriteString(L"</file>");
}
oBuilder.WriteString(L"</html>");
#ifdef WIN32
STARTUPINFO sturtupinfo;
ZeroMemory(&sturtupinfo,sizeof(STARTUPINFO));
sturtupinfo.cb = sizeof(STARTUPINFO);
std::wstring sTempFileForParams = NSFile::CFileBinary::CreateTempFileWithUniqueName(NSFile::CFileBinary::GetTempPath(), L"XML");
NSFile::CFileBinary oFile;
oFile.CreateFileW(sTempFileForParams);
oFile.WriteStringUTF8(oBuilder.GetData(), true);
oFile.CloseFile();
std::wstring sApp = L"HtmlFileInternal <html>" + sTempFileForParams;
wchar_t* pCommandLine = NULL;
if (true)
{
pCommandLine = new wchar_t[sApp.length() + 1];
memcpy(pCommandLine, sApp.c_str(), sApp.length() * sizeof(wchar_t));
pCommandLine[sApp.length()] = (wchar_t)'\0';
}
#if 0
std::wstringstream ss;
ss << L"PATH=" << _wgetenv(L"PATH");
ss << L";" << NSFile::GetProcessDirectory();
ss << L";" << (NSFile::GetProcessDirectory() + L"\\..");
ss << L";" << (NSFile::GetProcessDirectory() + L"\\..\\..");
std::wstring env = ss.str();
wchar_t* pCommandLineEnv = NULL;
if (true)
{
pCommandLineEnv = new wchar_t[env.length() + 2];
memcpy(pCommandLineEnv, env.c_str(), sApp.length() * sizeof(wchar_t));
pCommandLineEnv[env.length()] = (wchar_t)'\0';
pCommandLineEnv[env.length() + 1] = (wchar_t)'\0';
}
PROCESS_INFORMATION processinfo;
ZeroMemory(&processinfo,sizeof(PROCESS_INFORMATION));
BOOL bResult = CreateProcessW(sInternal.c_str(), pCommandLine,
NULL, NULL, TRUE, CREATE_UNICODE_ENVIRONMENT, (LPVOID)pCommandLineEnv, NULL, &sturtupinfo, &processinfo);
#else
HANDLE ghJob = CreateJobObject(NULL, NULL);
if (ghJob)
{
JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
// Configure all child processes associated with the job to terminate when the
jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
if ( 0 == SetInformationJobObject( ghJob, JobObjectExtendedLimitInformation, &jeli, sizeof(jeli)))
{
CloseHandle(ghJob);
ghJob = NULL;
}
}
PROCESS_INFORMATION processinfo;
ZeroMemory(&processinfo,sizeof(PROCESS_INFORMATION));
BOOL bResult = CreateProcessW(sInternal.c_str(), pCommandLine,
NULL, NULL, TRUE, NULL, NULL, NULL, &sturtupinfo, &processinfo);
if (bResult && ghJob)
{
AssignProcessToJobObject(ghJob, processinfo.hProcess);
}
#endif
::WaitForSingleObject(processinfo.hProcess, INFINITE);
RELEASEARRAYOBJECTS(pCommandLine);
//get exit code
DWORD dwExitCode = 0;
if (GetExitCodeProcess(processinfo.hProcess, &dwExitCode))
{
nReturnCode = (int)dwExitCode;
}
CloseHandle(processinfo.hProcess);
CloseHandle(processinfo.hThread);
NSFile::CFileBinary::Remove(sTempFileForParams);
#endif
#ifdef LINUX
std::wstring sTempFileForParams = NSFile::CFileBinary::CreateTempFileWithUniqueName(NSFile::CFileBinary::GetTempPath(), L"XML");
NSFile::CFileBinary oFile;
oFile.CreateFileW(sTempFileForParams);
oFile.WriteStringUTF8(oBuilder.GetData(), true);
oFile.CloseFile();
pid_t pid = fork(); // create child process
int status;
std::string sProgramm = U_TO_UTF8(sInternal);
std::string sXmlA = "<html>" + U_TO_UTF8(sTempFileForParams);
switch (pid)
{
case -1: // error
break;
case 0: // child process
{
std::string sLibraryDir = sProgramm;
std::string::size_type posLast = sProgramm.find_last_of('/');
std::string sProgrammDir = sProgramm.substr(0, posLast);
if (std::string::npos != posLast)
sLibraryDir = sProgrammDir + ":" + sProgrammDir + "/../:" + sProgrammDir + "/converter/";
if (!IsLinuxXVFB())
{
sLibraryDir = "LD_LIBRARY_PATH=" + sLibraryDir;
const char* nargs[2];
nargs[0] = sXmlA.c_str();
nargs[1] = NULL;
const char* nenv[4];
nenv[0] = sLibraryDir.c_str();
nenv[1] = "DISPLAY=:0";
nenv[2] = NULL;
nenv[3] = NULL;
execve(sProgramm.c_str(),
(char * const *)nargs,
(char * const *)nenv);
exit(EXIT_SUCCESS);
}
else
{
const char* nargs[6];
nargs[0] = "-a";
nargs[1] = "--auto-servernum";
nargs[2] = "--server-num=1";
nargs[3] = sProgramm.c_str();
nargs[4] = sXmlA.c_str();
nargs[5] = NULL;
/*
const char* nenv[4];
nenv[0] = sLibraryDir.c_str();
nenv[1] = NULL;//"DISPLAY=:99";
nenv[2] = NULL;
*/
char** env = linux_environ_get(sLibraryDir);
//linux_environ_print(env);
execve("/usr/bin/xvfb-run", (char * const *)nargs, (char * const *)env);
linux_environ_clear(env);
exit(EXIT_SUCCESS);
}
break;
}
default: // parent process, pid now contains the child pid
while (-1 == waitpid(pid, &status, 0)); // wait for child to complete
if (WIFEXITED(status))
{
nReturnCode = WEXITSTATUS(status);
}
break;
}
#endif
for (std::vector<std::wstring>::iterator i = arTmpFiles.begin(); i != arTmpFiles.end(); i++)
{
NSFile::CFileBinary::Remove(*i);
}
arTmpFiles.clear();
NSFile::CFileBinary::Remove(sTempFileForParams);
return nReturnCode;
}
/////////////////////////////////////////////////////////////////
// EPUB
/////////////////////////////////////////////////////////////////
static std::vector<std::wstring> ParseEpub(const std::wstring& sPackagePath, std::wstring& sMetaInfo)
{
std::vector<std::wstring> arHtmls;
XmlUtils::CXmlNode oNodeRoot;
if (!oNodeRoot.FromXmlFile(sPackagePath))
return arHtmls;
XmlUtils::CXmlNode oNodeMeta = oNodeRoot.ReadNodeNoNS(L"metadata");
if (oNodeMeta.IsValid())
{
NSStringUtils::CStringBuilder oBuilder;
std::wstring sTitle = oNodeMeta.ReadValueString(L"dc:title");
std::wstring sCreator = oNodeMeta.ReadValueString(L"dc:creator");
std::wstring sPublisher = oNodeMeta.ReadValueString(L"dc:publisher");
std::wstring sLanguage = oNodeMeta.ReadValueString(L"dc:language");
std::wstring sContributor = oNodeMeta.ReadValueString(L"dc:contributor");
std::wstring sDescription = oNodeMeta.ReadValueString(L"dc:description");
std::wstring sCoverage = oNodeMeta.ReadValueString(L"dc:coverage");
std::vector<XmlUtils::CXmlNode> oMetaNodes = oNodeMeta.ReadNodesNoNS(L"meta");
if (oMetaNodes.IsValid())
{
size_t nCountMeta = oMetaNodes.size();
for (size_t i = 0; i < nCountMeta; ++i)
{
XmlUtils::CXmlNode &oNodeTmp = oMetaNodes[i];
std::wstring sName = oNodeTmp.GetAttribute(L"name");
if (sName == L"cover")
sCoverage = L"1";
}
}
if (!sTitle.empty())
{
oBuilder.WriteString(L"<name>");
oBuilder.WriteEncodeXmlString(sTitle.c_str(), (int)sTitle.length());
oBuilder.WriteString(L"</name>");
}
if (!sCreator.empty())
{
oBuilder.WriteString(L"<author>");
oBuilder.WriteEncodeXmlString(sCreator.c_str(), (int)sCreator.length());
oBuilder.WriteString(L"</author>");
oBuilder.WriteString(L"<creator>");
oBuilder.WriteEncodeXmlString(sCreator.c_str(), (int)sCreator.length());
oBuilder.WriteString(L"</creator>");
}
if (!sPublisher.empty())
{
oBuilder.WriteString(L"<publisher>");
oBuilder.WriteEncodeXmlString(sPublisher.c_str(), (int)sPublisher.length());
oBuilder.WriteString(L"</publisher>");
}
if (!sLanguage.empty())
{
oBuilder.WriteString(L"<language>");
oBuilder.WriteEncodeXmlString(sLanguage.c_str(), (int)sLanguage.length());
oBuilder.WriteString(L"</language>");
}
if (!sContributor.empty())
{
oBuilder.WriteString(L"<creator>");
oBuilder.WriteEncodeXmlString(sContributor.c_str(), (int)sContributor.length());
oBuilder.WriteString(L"</creator>");
}
if (!sDescription.empty())
{
oBuilder.WriteString(L"<annotation>");
oBuilder.WriteEncodeXmlString(sDescription.c_str(), (int)sDescription.length());
oBuilder.WriteString(L"</annotation>");
}
if (!sCoverage.empty())
{
oBuilder.WriteString(L"<coverpage>1</coverpage>");
}
if (0 != oBuilder.GetCurSize())
sMetaInfo = L"<meta>" + oBuilder.GetData() + L"</meta>";
}
XmlUtils::CXmlNode oNodeSpine = oNodeRoot.ReadNodeNoNS(L"spine");
if (!oNodeRoot.IsValid())
return arHtmls;
std::vector<XmlUtils::CXmlNode> oNodesItemRef = oNodeSpine.ReadNodesNoNS(L"itemref");
if (!oNodeSpine.IsValid())
return arHtmls;
std::vector<std::wstring> sIds;
size_t nCountRefs = oNodesItemRef.size();
for (size_t i = 0; i < nCountRefs; ++i)
{
XmlUtils::CXmlNode &oNodeTmp = oNodesItemRef[i];
std::wstring sId = oNodeTmp.GetAttribute(L"idref");
if (!sId.empty())
sIds.push_back(sId);
}
if (0 == sIds.size())
return arHtmls;
XmlUtils::CXmlNode oNodeManifest = oNodeRoot.ReadNodeNoNS(L"manifest");
if (!oNodeRoot.IsValid())
return arHtmls;
std::vector<XmlUtils::CXmlNode> oNodesItems = oNodeManifest.ReadNodesNoNS(L"item");
if (!oNodeManifest.IsValid())
return arHtmls;
size_t pos = sPackagePath.find_last_of((wchar_t)'/');
std::wstring sPackagePathDir = sPackagePath;
if (std::wstring::npos != pos)
sPackagePathDir = sPackagePath.substr(0, pos + 1);
std::map<std::wstring, std::wstring> mapHtmls;
size_t nCountItems = oNodesItems.size();
for (size_t i = 0; i < nCountItems; ++i)
{
XmlUtils::CXmlNode &oNodeTmp = oNodesItems[i];
std::wstring sMime = oNodeTmp.GetAttribute(L"media-type");
std::wstring sHRef = oNodeTmp.GetAttribute(L"href");
#if 0
//Decode URL
sHRef.Replace(_T("%20"), _T(" "));
sHRef.Replace(_T("%3B"), _T(";"));
sHRef.Replace(_T("%2C"), _T(","));
sHRef.Replace(_T("%26"), _T("&"));
sHRef.Replace(_T("%3D"), _T("="));
sHRef.Replace(_T("%2B"), _T("+"));
sHRef.Replace(_T("%24"), _T("$"));
#endif
std::wstring sId = oNodeTmp.GetAttribute(L"id");
if (!sMime.empty() && !sHRef.empty())
mapHtmls.insert(std::pair<std::wstring, std::wstring>(sId, sPackagePathDir + sHRef));
}
for (std::vector<std::wstring>::iterator iter = sIds.begin(); iter != sIds.end(); iter++)
{
std::map<std::wstring, std::wstring>::const_iterator i = mapHtmls.find(*iter);
if (i != mapHtmls.end())
{
arHtmls.push_back(i->second);
}
}
return arHtmls;
}
int CHtmlFile::ConvertEpub(const std::wstring& sFolder, std::wstring& sMetaInfo, const std::wstring& sDstfolder)
{
std::wstring sFolderWithSlash = sFolder;
NSStringExt::Replace(sFolderWithSlash, L"\\", L"/");
if (!sFolderWithSlash.empty())
{
wchar_t c = sFolderWithSlash.c_str()[sFolderWithSlash.length() - 1];
if (c != '/' && c != '\\')
sFolderWithSlash += L"/";
}
std::wstring sMimeType = L"";
if (!NSFile::CFileBinary::ReadAllTextUtf8(sFolderWithSlash + L"mimetype", sMimeType))
return 1;
std::wstring::size_type findMime = sMimeType.find(L"application/epub+zip");
if (findMime == std::wstring::npos || findMime > 10) // 10 - просто число. Заглушка под мега епабы
return 1;
std::wstring sContainer = sFolderWithSlash + L"META-INF/container.xml";
XmlUtils::CXmlNode oNodeContainer;
if (!oNodeContainer.FromXmlFile(sContainer))
return 1;
XmlUtils::CXmlNode oNodeRootFiles = oNodeContainer.ReadNodeNoNS(L"rootfiles");
if (!oNodeRootFiles.IsValid())
return 1;
std::wstring sPackagePathXml;
std::vector<XmlUtils::CXmlNode> oNodesRootFile = oNodeRootFiles.ReadNodesNoNS(L"rootfile");
if (!oNodeRootFiles.IsValid())
return 1;
size_t nCount = oNodesRootFile.size();
for (size_t i = 0; i < nCount; ++i)
{
XmlUtils::CXmlNode &oNodeRF = oNodesRootFile[i];
std::wstring sMime = oNodeRF.GetAttribute(L"media-type");
std::wstring sPackagePath = oNodeRF.GetAttribute(L"full-path");
if (!sPackagePath.empty() && L"application/oebps-package+xml" == sMime)
sPackagePathXml = sFolderWithSlash + sPackagePath;
}
if (sPackagePathXml.empty())
return 1;
std::vector<std::wstring> arHtmls = ParseEpub(sPackagePathXml, sMetaInfo);
if (arHtmls.size() == 0)
return 1;
m_internal->m_bIsEpub = true;
int nErr = this->Convert(arHtmls, sDstfolder);
m_internal->m_bIsEpub = false;
return nErr;
}
/////////////////////////////////////////////////////////////////
// MHT
/////////////////////////////////////////////////////////////////
#include <list>
#include <algorithm>
#include "../UnicodeConverter/UnicodeConverter.h"
namespace NSMht
{
char easytolower(char in)
{
if (in<='Z' && in>='A')
return in-('Z'-'z');
return in;
}
wchar_t easytolower_w(wchar_t in)
{
if (in<='Z' && in>='A')
return in-('Z'-'z');
return in;
}
namespace Names
{
const std::string boundary_str = "boundary=";
const std::string contentType_str = "content-type:";
const std::string contentTransferEncoding_str = "content-transfer-encoding:";
const std::string contentLocation_str = "content-location:";
const std::string contentCharset_str = "charset=";
const std::string contentID_str = "content-id:";
const std::string htmlFileType = "text/html";
const std::string xmlFileType = "text/xml";
const std::string cssFileType = "text/css";
const std::string imageFileType = "image/";
const std::string jsFileType = "application/x-javascript";
const std::string code_7bit = "7bit";
const std::string code_8bit = "8bit";
const std::string code_QuotedPrintable = "quoted-printable";
const std::string code_Base64 = "base64";
}
void string_replace(std::wstring& text, const std::wstring& replaceFrom, const std::wstring& replaceTo)
{
size_t posn = 0;
while (std::wstring::npos != (posn = text.find(replaceFrom, posn)))
{
text.replace(posn, replaceFrom.length(), replaceTo);
posn += replaceTo.length();
}
}
class CInnerFile
{
public:
std::string m_sContentType;
std::wstring m_sContentLocation;
std::wstring m_sContentID;
std::string m_sEncoding;
std::string m_sContentEncoding;
std::string m_sData;
std::wstring m_sDstFilePath;
public:
void Save(const std::map<std::wstring, std::wstring>& sMap, NSUnicodeConverter::CUnicodeConverter* pUnicodeConverter)
{
bool bIsCSS = (m_sContentType.find(Names::cssFileType) != std::string::npos) ? true : false;
bool bIsHtml = (m_sContentType.find(Names::htmlFileType) != std::string::npos ||
m_sContentType.find(Names::xmlFileType) != std::string::npos ||
m_sContentType.find(Names::jsFileType) != std::string::npos) ? true : false;
if (bIsCSS || bIsHtml)
{
if (m_sContentEncoding.find(Names::code_Base64) != std::string::npos)
{
BYTE* pData = NULL;
int nLen = 0;
NSFile::CBase64Converter::Decode(m_sData.c_str(), m_sData.length(), pData, nLen);
m_sData = std::string((char*)pData, nLen);
RELEASEARRAYOBJECTS(pData);
}
std::string sEnc = m_sEncoding;
if (sEnc.empty())
{
if (m_sContentEncoding.find(Names::code_7bit) != std::string::npos)
{
sEnc = "US-ASCII";
}
else
{
sEnc = "latin1";
}
}
std::wstring sRes = pUnicodeConverter->toUnicode(m_sData, sEnc.c_str());
// дальше конвертим обратно в нужную кодировку, меня пути
for (std::map<std::wstring, std::wstring>::const_iterator i = sMap.begin(); i != sMap.end(); i++)
{
std::list<std::wstring> listReplace;
listReplace.push_back(i->first);
// корень
if (true)
{
std::wstring::size_type pos = m_sContentLocation.find(L"//");
std::wstring::size_type start = 0;
if (pos != std::wstring::npos)
start = pos + 3; // '///'
pos = m_sContentLocation.find('/', start);
if (pos != std::wstring::npos)
{
std::wstring sMain = m_sContentLocation.substr(0, pos);
if (0 == i->first.find(sMain))
{
listReplace.push_back(i->first.substr(sMain.length()));
}
}
}
// и относительная
if (true)
{
std::wstring::size_type pos = m_sContentLocation.rfind('/');
if (pos != std::wstring::npos)
{
std::wstring sUrl = m_sContentLocation.substr(0, pos + 1);
if (0 == i->first.find(sUrl))
{
std::wstring sCandidate = i->first.substr(sUrl.length());
listReplace.push_back(sCandidate);
listReplace.push_back(L"./" + sCandidate);
}
}
}
if (bIsHtml)
{
std::wstring sReplace = L"\"" + i->second + L"\"";
for (std::list<std::wstring>::iterator i = listReplace.begin(); i != listReplace.end(); i++)
{
std::wstring sFind1 = L"\"" + *i + L"\"";
std::wstring sFind2 = L"'" + *i + L"'";
string_replace(sRes, sFind1, sReplace);
string_replace(sRes, sFind2, sReplace);
}
}
else
{
std::wstring sReplace = L"url(" + i->second + L")";
for (std::list<std::wstring>::iterator i = listReplace.begin(); i != listReplace.end(); i++)
{
std::wstring sFind1 = L"url(" + *i + L")";
std::wstring sFind2 = L"url('" + *i + L"')";
std::wstring sFind3 = L"url(\"" + *i + L"\")";
string_replace(sRes, sFind1, sReplace);
string_replace(sRes, sFind2, sReplace);
string_replace(sRes, sFind3, sReplace);
}
}
}
std::string sResA = pUnicodeConverter->fromUnicode(sRes, sEnc.c_str());
NSFile::CFileBinary oDstFile;
if (oDstFile.CreateFileW(m_sDstFilePath))
{
oDstFile.WriteFile((BYTE*)sResA.c_str(), (DWORD)sResA.length());
}
oDstFile.CloseFile();
}
else
{
if (m_sContentEncoding.find(Names::code_Base64) != std::string::npos)
{
BYTE* pData = NULL;
int nLen = 0;
NSFile::CBase64Converter::Decode(m_sData.c_str(), m_sData.length(), pData, nLen);
NSFile::CFileBinary oFile;
oFile.CreateFileW(m_sDstFilePath);
oFile.WriteFile(pData, nLen);
oFile.CloseFile();
RELEASEARRAYOBJECTS(pData);
}
else
{
std::string sEnc = m_sEncoding;
if (sEnc.empty())
{
if (m_sContentEncoding.find(Names::code_7bit) != std::string::npos)
{
sEnc = "US-ASCII";
}
else
{
sEnc = "latin1";
}
}
std::wstring sRes = pUnicodeConverter->toUnicode(m_sData, sEnc.c_str());
NSFile::CFileBinary::SaveToFile(m_sDstFilePath, sRes, true);
}
}
}
void CorrectType()
{
if (m_sContentType == "application/octet-stream")
{
std::string::size_type pos1 = m_sData.find("<HTML");
std::string::size_type pos2 = m_sData.find("<html");
if (pos1 != std::string::npos && pos1 < 100)
{
m_sContentType = Names::htmlFileType;
return;
}
else if (pos2 != std::string::npos && pos2 < 100)
{
m_sContentType = Names::htmlFileType;
return;
}
std::wstring::size_type posExt = m_sContentLocation.rfind('.');
if (posExt != std::wstring::npos)
{
std::wstring sExt = m_sContentLocation.substr(posExt);
posExt = sExt.find('?');
if (std::wstring::npos != posExt)
sExt = sExt.substr(0, posExt);
if (sExt == L".js")
m_sContentType = Names::jsFileType;
else if (sExt == L".png")
m_sContentType = "image/png";
else if (sExt == L".jpg" || sExt == L".jpeg")
m_sContentType = "image/jpg";
else if (sExt == L".gif")
m_sContentType = "image/gif";
else if (sExt == L".css")
m_sContentType = Names::cssFileType;
}
}
}
};
class CMhtFile
{
public:
CInnerFile m_oFile;
std::list<CInnerFile> m_arFiles;
std::wstring m_sFolder;
std::map<std::wstring, std::wstring> m_sUrlMap;
NSStringUtils::CStringBuilder m_oBuilder; // temp builder
NSUnicodeConverter::CUnicodeConverter m_oUnicodeConverter;
std::string m_sEncoding;
public:
CMhtFile()
{
m_sFolder = NSFile::CFileBinary::CreateTempFileWithUniqueName(NSFile::CFileBinary::GetTempPath(), L"MHT");
#if 0
m_sFolder = L"D:\\test\\Document\\MHT";
#endif
// под линуксом предыдущая функция создает файл!!!
if (NSFile::CFileBinary::Exists(m_sFolder))
NSFile::CFileBinary::Remove(m_sFolder);
NSDirectory::CreateDirectory(m_sFolder);
m_sEncoding = "latin1";
}
~CMhtFile()
{
NSDirectory::DeleteDirectory(m_sFolder);
}
std::string ReadFile(const std::wstring& sFileSrc)
{
BYTE* pData = NULL;
DWORD dwSize = 0;
NSFile::CFileBinary::ReadAllBytes(sFileSrc, &pData, dwSize);
DWORD nBomSize = 0;
if (dwSize >= 4)
{
DWORD dwBOM = 0;
dwBOM |= pData[0];
dwBOM |= (pData[1] << 8);
dwBOM |= (pData[2] << 16);
dwBOM |= (pData[3] << 24);
if (0x00BFBBEF == (dwBOM & 0x00FFFFFF))
{
m_sEncoding = "UTF-8";
nBomSize = 3;
}
else if (0x0000FFFE == (dwBOM & 0x0000FFFF))
{
m_sEncoding = "UTF-16BE";
nBomSize = 2;
}
else if (0x0000FEFF == (dwBOM & 0x0000FFFF))
{
m_sEncoding = "UTF-16LE";
nBomSize = 2;
}
}
return std::string((char*)(pData + nBomSize), (dwSize - nBomSize));
}
int charFromHex ( const char& _char)
{
int p = 0;
if (_char >= '0' && _char <= '9')
p = _char - '0';
else if (_char >= 'A' && _char <= 'F')
p = _char - 'A' + 10;
return p;
}
std::string decodingQuotedPrintable(const std::string& line)
{
int nLength = (int)line.length();
if (0 == nLength)
return "";
const char* pSrcData = line.c_str();
char* pDstData = new char[nLength + 1];
int j = 0;
for (int i = 0; i < nLength; i++)
{
if (pSrcData[i] != '=')
{
pDstData[j++] = line[i];
}
else
{
if ((i + 2) < nLength)
pDstData[j++] = 16 * charFromHex(pSrcData[i + 1]) + charFromHex(pSrcData[i + 2]);
i += 2;
}
}
pDstData[j] = '\0';
std::string result(pDstData);
delete [] pDstData;
return result;
}
void Convert()
{
// сначала делаем мап файлов
int nNumber = 0;
for (std::list<CInnerFile>::iterator i = m_arFiles.begin(); i != m_arFiles.end(); i++)
{
nNumber++;
CInnerFile* pFile = i.operator ->();
std::wstring sFileExt = L".png"; // L".bin" - обычно это картинки. Так и будем сохранять
if (pFile->m_sContentType.find(Names::cssFileType) != std::string::npos)
{
sFileExt = L".css";
}
else if (pFile->m_sContentType.find(Names::imageFileType) != std::string::npos)
{
if (pFile->m_sContentType.find("png") != std::string::npos)
sFileExt = L".png";
else if (pFile->m_sContentType.find("gif") != std::string::npos)
sFileExt = L".gif";
else
sFileExt = L".jpg";
}
else if (pFile->m_sContentType.find("xml") != std::string::npos)
{
sFileExt = L".xml";
}
else if (pFile->m_sContentType.find("html") != std::string::npos)
{
sFileExt = L".html";
}
else if (pFile->m_sContentType.find("javascript") != std::string::npos)
{
sFileExt = L".js";
}
std::wstring sUrl = L"/" + std::to_wstring(nNumber) + sFileExt;
pFile->m_sDstFilePath = m_sFolder + sUrl;
m_sUrlMap.insert(std::pair<std::wstring, std::wstring>(pFile->m_sContentLocation, L"." + sUrl));
}
for (std::list<CInnerFile>::iterator i = m_arFiles.begin(); i != m_arFiles.end(); i++)
{
i->Save(m_sUrlMap, &m_oUnicodeConverter);
}
m_oFile.m_sDstFilePath = m_sFolder + L"/index.html";
m_oFile.Save(m_sUrlMap, &m_oUnicodeConverter);
}
inline std::string GetLower(const std::string& sSrc)
{
std::string sRet = sSrc;
std::transform(sRet.begin(), sRet.end(), sRet.begin(), easytolower);
return sRet;
}
inline std::wstring GetLower(const std::wstring& sSrc)
{
std::wstring sRet = sSrc;
std::transform(sRet.begin(), sRet.end(), sRet.begin(), easytolower_w);
return sRet;
}
std::string ParseFilePropertyA(const std::string& line, std::string::size_type pos)
{
std::string::size_type _first = pos;
std::string::size_type _last = line.length();
const char* pData = line.c_str();
while ((pData[_first] == ' ' || pData[_first] == '\"') && _first < _last)
++_first;
std::string::size_type _last1 = line.find(';', _first);
std::string::size_type _last2 = line.find('\"', _first);
if (_last1 != std::string::npos && _last > _last1)
_last = _last1;
if (_last2 != std::string::npos && _last > _last2)
_last = _last2;
return line.substr(_first, _last - _first);
}
std::wstring ParseFileProperty(const std::wstring& line, std::wstring::size_type pos)
{
std::wstring::size_type _first = pos;
std::wstring::size_type _last = line.length();
const wchar_t* pData = line.c_str();
while ((pData[_first] == ' ' || pData[_first] == '\"') && _first < _last)
++_first;
std::wstring::size_type _last1 = line.find(';', _first);
std::wstring::size_type _last2 = line.find('\"', _first);
if (_last1 != std::wstring::npos && _last > _last1)
_last = _last1;
if (_last2 != std::wstring::npos && _last > _last2)
_last = _last2;
return line.substr(_first, _last - _first);
}
bool CheckProperty(const std::string& sSrcLower, const std::string& sSrcNatural, const std::string& sProperty, std::string& sValue)
{
std::string::size_type posFindHeader = sSrcLower.find(sProperty);
if (std::string::npos != posFindHeader)
{
sValue = this->ParseFilePropertyA(sSrcNatural, posFindHeader + sProperty.length());
return true;
}
return false;
}
bool CheckPropertyW(const std::string& sSrcLower, const std::string& sSrcNatural, const std::string& sProperty, std::wstring& sValue)
{
std::string::size_type posFindHeader = sSrcLower.find(sProperty);
if (std::string::npos != posFindHeader)
{
sValue = m_oUnicodeConverter.toUnicode(this->ParseFilePropertyA(sSrcNatural, posFindHeader + sProperty.length()), m_sEncoding.c_str());
return true;
}
return false;
}
void Parse(const std::wstring& sFileSrc)
{
std::string sFileData = this->ReadFile(sFileSrc);
if (sFileData.empty())
return;
std::list<std::string> content;
char* pChars = (char*)sFileData.c_str();
int nLenSrc = (int)sFileData.length();
int nPrevNewLine = 0;
NSStringUtils::CStringBuilderA oBuilderA;
for (int i = 0; i < nLenSrc; ++i)
{
oBuilderA.ClearNoAttack();
while (i < nLenSrc)
{
if (pChars[i] == '\r')
{
content.push_back(oBuilderA.GetData());
++i;
nPrevNewLine = i;
break;
}
// BAD symbols \x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19
if (pChars[i] >= 0x0A && pChars[i] <= 0x19)
{
++i;
continue;
}
oBuilderA.AddCharSafe(pChars[i]);
++i;
}
}
content.push_back(oBuilderA.GetData());
std::string boundary;
std::wstring doc_location;
//пробегаемся по строкам файла MHT
for (std::list<std::string>::iterator i = content.begin(); i != content.end();)
{
// конвертируем строку с кодировкой файла
std::string sLowerLine = GetLower(*i);
//Ищем инициализацию boundary в шапке документа MHT(boundary - разделитель внутренних файлов) - обязательный параметр
if (CheckProperty(sLowerLine, *i, Names::boundary_str, boundary))
{
boundary = "--" + boundary;
i++;
}
//Ищем инициализацию contentLocation в шапке(наименование главного внутренний файла) - может отсутствовать
else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, doc_location))
{
i++;
}
//если встретили разделитель, то начинаем считывать новый внутренний файл
else if (*i == boundary && !boundary.empty())
{
CInnerFile oInnerFile;
//сначала считывается шапка внутреннего файла, которая отделяется от основного текста файлом как минимум одной пустой строкой
while ( i->length() != 0 )
{
sLowerLine = GetLower(*i);
// Проверяем, возможно разделитель поменялся с данного места
if (CheckProperty(sLowerLine, *i, Names::boundary_str, boundary))
{
boundary = "--" + boundary;
}
//тип файла (image/, text/html, text/css)
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType))
{
if (oInnerFile.m_sContentType.find(Names::htmlFileType) != std::string::npos)
{
if (sLowerLine.find(".gif") != std::string::npos)
oInnerFile.m_sContentType = "image/gif";
else if (sLowerLine.find(".png") != std::string::npos)
oInnerFile.m_sContentType = "image/png";
else if (sLowerLine.find(".jpg") != std::string::npos || sLowerLine.find(".jpeg") != std::string::npos)
oInnerFile.m_sContentType = "image/jpg";
}
}
//наименование файла
else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {}
else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {}
//кодировка (base64, 8bit, quoted-printable)
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentTransferEncoding_str, oInnerFile.m_sContentEncoding)) {}
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentCharset_str, oInnerFile.m_sEncoding)) {}
i++;
}
while ( i->length() == 0)
i++;
oBuilderA.ClearNoAttack();
bool bIs16 = (oInnerFile.m_sContentEncoding.find(Names::code_QuotedPrintable) != std::string::npos) ? true : false;
bool bIsBase64 = (oInnerFile.m_sContentEncoding.find(Names::code_Base64) != std::string::npos) ? true : false;
while (i != content.end() && i->find(boundary) == std::string::npos)
{
if (bIs16)
{
oBuilderA.WriteString(decodingQuotedPrintable(*i++));
}
else
{
oBuilderA.WriteString(*i++);
}
if (!bIsBase64)
oBuilderA.AddCharSafe('\n');
}
oInnerFile.m_sData = oBuilderA.GetData();
oInnerFile.CorrectType();
if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos)
{
m_oFile = oInnerFile;
}
else if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::xmlFileType) != std::wstring::npos)
{
m_oFile = oInnerFile;
}
else
{
m_arFiles.push_back(oInnerFile);
}
}
else
i++;
}
//встречаются такие документы, где отсутсвует boundary
if (boundary == "")
{
for(std::list<std::string>::iterator i = content.begin(); i != content.end();)
{
CInnerFile oInnerFile;
//сначала считывается шапка внутреннего файла, которая отделяется от основного текста файлом как минимум одной пустой строкой
while ( i->length() != 0 )
{
std::string sLowerLine = GetLower(*i);
if (CheckProperty(sLowerLine, sLowerLine, Names::contentType_str, oInnerFile.m_sContentType))
{
if (oInnerFile.m_sContentType.find(Names::htmlFileType) != std::string::npos)
{
if (sLowerLine.find(".gif") != std::string::npos)
oInnerFile.m_sContentType = "image/gif";
else if (sLowerLine.find(".png") != std::string::npos)
oInnerFile.m_sContentType = "image/png";
else if (sLowerLine.find(".jpg") != std::string::npos || sLowerLine.find(".jpeg") != std::string::npos)
oInnerFile.m_sContentType = "image/jpg";
}
}
//наименование файла
else if (CheckPropertyW(sLowerLine, *i, Names::contentLocation_str, oInnerFile.m_sContentLocation)) {}
else if (CheckPropertyW(sLowerLine, *i, Names::contentID_str, oInnerFile.m_sContentID)) {}
//кодировка (base64, 8bit, quoted-printable)
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentTransferEncoding_str, oInnerFile.m_sContentEncoding)) {}
else if (CheckProperty(sLowerLine, sLowerLine, Names::contentCharset_str, oInnerFile.m_sEncoding)) {}
i++;
}
if (oInnerFile.m_sContentType.empty())
oInnerFile.m_sContentType = Names::htmlFileType;
if (oInnerFile.m_sContentEncoding.empty())
oInnerFile.m_sContentEncoding = Names::code_QuotedPrintable;
while ( i->length() == 0)
i++;
oBuilderA.ClearNoAttack();
bool bIs16 = (oInnerFile.m_sContentEncoding.find(Names::code_QuotedPrintable) != std::string::npos) ? true : false;
bool bIsBase64 = (oInnerFile.m_sContentEncoding.find(Names::code_Base64) != std::string::npos) ? true : false;
while (i != content.end() && i->find(boundary) == std::string::npos)
{
if (bIs16)
{
oBuilderA.WriteString(decodingQuotedPrintable(*i++));
}
else
{
oBuilderA.WriteString(*i++);
}
if (!bIsBase64)
oBuilderA.AddCharSafe('\n');
}
oInnerFile.m_sData = oBuilderA.GetData();
oInnerFile.CorrectType();
if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::htmlFileType) != std::wstring::npos)
{
m_oFile = oInnerFile;
}
else if (m_oFile.m_sData.empty() && oInnerFile.m_sContentType.find(Names::xmlFileType) != std::wstring::npos)
{
m_oFile = oInnerFile;
}
else
{
m_arFiles.push_back(oInnerFile);
}
}
}
}
};
}
int CHtmlFile::ConvertMht(const std::wstring& sFile, const std::wstring& sDstfolder)
{
NSMht::CMhtFile oFile;
oFile.Parse(sFile);
oFile.Convert();
std::wstring sFileMht = oFile.m_sFolder + L"/index.html";
std::vector<std::wstring> arFiles;
arFiles.push_back(sFileMht);
return this->Convert(arFiles, sDstfolder);
}