forked from mirrors/0ad
fd3d335265
Fixes random crashes on Windows due to differences of STL types on the binary level between different static libraries. The differences are caused by the definition of _HAS_ITERATOR_DEBUGGING and _SECURE_SCL in lib/precompiled.h. All our engine static libs use precompiled headers but we didn't use them for tinygettext because they aren't needed (except for these two defines). I've decided to add precompiled headers to tinygettext too instead of just copying these defines. That should cause less headaches in the future. This was SVN commit r14983.
499 lines
12 KiB
C++
499 lines
12 KiB
C++
// tinygettext - A gettext replacement that works directly on .po files
|
|
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
|
|
//
|
|
// This program is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU General Public License
|
|
// as published by the Free Software Foundation; either version 2
|
|
// of the License, or (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
#include "precompiled.h"
|
|
|
|
#include "po_parser.hpp"
|
|
|
|
#include <iostream>
|
|
#include <ctype.h>
|
|
#include <string>
|
|
#include <istream>
|
|
#include <string.h>
|
|
#include <map>
|
|
#include <stdlib.h>
|
|
|
|
#include "language.hpp"
|
|
#include "log_stream.hpp"
|
|
#include "iconv.hpp"
|
|
#include "dictionary.hpp"
|
|
#include "plural_forms.hpp"
|
|
|
|
namespace tinygettext {
|
|
|
|
bool POParser::pedantic = true;
|
|
|
|
void
|
|
POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict, bool use_fuzzy_)
|
|
{
|
|
POParser parser(filename, in, dict, use_fuzzy_);
|
|
parser.parse();
|
|
}
|
|
|
|
class POParserError {};
|
|
|
|
POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
|
|
filename(filename_),
|
|
in(in_),
|
|
dict(dict_),
|
|
use_fuzzy(use_fuzzy_),
|
|
running(false),
|
|
eof(false),
|
|
big5(false),
|
|
line_number(0),
|
|
current_line(),
|
|
conv()
|
|
{
|
|
}
|
|
|
|
POParser::~POParser()
|
|
{
|
|
}
|
|
|
|
void
|
|
POParser::warning(const std::string& msg)
|
|
{
|
|
log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
|
|
//log_warning << "Line: " << current_line << std::endl;
|
|
}
|
|
|
|
void
|
|
POParser::error(const std::string& msg)
|
|
{
|
|
log_error << filename << ":" << line_number << ": error: " << msg << ": " << current_line << std::endl;
|
|
|
|
// Try to recover from an error by searching for start of another entry
|
|
do
|
|
next_line();
|
|
while(!eof && !is_empty_line());
|
|
|
|
throw POParserError();
|
|
}
|
|
|
|
void
|
|
POParser::next_line()
|
|
{
|
|
line_number += 1;
|
|
if (!std::getline(in, current_line))
|
|
eof = true;
|
|
}
|
|
|
|
void
|
|
POParser::get_string_line(std::ostringstream& out,std::string::size_type skip)
|
|
{
|
|
if (skip+1 >= static_cast<std::string::size_type>(current_line.size()))
|
|
error("unexpected end of line");
|
|
|
|
if (current_line[skip] != '"')
|
|
error("expected start of string '\"'");
|
|
|
|
std::string::size_type i;
|
|
for(i = skip+1; current_line[i] != '\"'; ++i)
|
|
{
|
|
if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
|
|
{
|
|
out << current_line[i];
|
|
|
|
i += 1;
|
|
|
|
if (i >= current_line.size())
|
|
error("invalid big5 encoding");
|
|
|
|
out << current_line[i];
|
|
}
|
|
else if (i >= current_line.size())
|
|
{
|
|
error("unexpected end of string");
|
|
}
|
|
else if (current_line[i] == '\\')
|
|
{
|
|
i += 1;
|
|
|
|
if (i >= current_line.size())
|
|
error("unexpected end of string in handling '\\'");
|
|
|
|
switch (current_line[i])
|
|
{
|
|
case 'a': out << '\a'; break;
|
|
case 'b': out << '\b'; break;
|
|
case 'v': out << '\v'; break;
|
|
case 'n': out << '\n'; break;
|
|
case 't': out << '\t'; break;
|
|
case 'r': out << '\r'; break;
|
|
case '"': out << '"'; break;
|
|
case '\\': out << '\\'; break;
|
|
default:
|
|
std::ostringstream err;
|
|
err << "unhandled escape '\\" << current_line[i] << "'";
|
|
warning(err.str());
|
|
|
|
out << current_line[i-1] << current_line[i];
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
out << current_line[i];
|
|
}
|
|
}
|
|
|
|
// process trailing garbage in line and warn if there is any
|
|
for(i = i+1; i < current_line.size(); ++i)
|
|
if (!isspace(current_line[i]))
|
|
{
|
|
warning("unexpected garbage after string ignoren");
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::string
|
|
POParser::get_string(std::string::size_type skip)
|
|
{
|
|
std::ostringstream out;
|
|
|
|
if (skip+1 >= static_cast<std::string::size_type>(current_line.size()))
|
|
error("unexpected end of line");
|
|
|
|
if (current_line[skip] == ' ' && current_line[skip+1] == '"')
|
|
{
|
|
get_string_line(out, skip+1);
|
|
}
|
|
else
|
|
{
|
|
if (pedantic)
|
|
warning("keyword and string must be seperated by a single space");
|
|
|
|
for(;;)
|
|
{
|
|
if (skip >= static_cast<std::string::size_type>(current_line.size()))
|
|
error("unexpected end of line");
|
|
else if (current_line[skip] == '\"')
|
|
{
|
|
get_string_line(out, skip);
|
|
break;
|
|
}
|
|
else if (!isspace(current_line[skip]))
|
|
{
|
|
error("string must start with '\"'");
|
|
}
|
|
else
|
|
{
|
|
// skip space
|
|
}
|
|
|
|
skip += 1;
|
|
}
|
|
}
|
|
|
|
next:
|
|
next_line();
|
|
for(std::string::size_type i = 0; i < current_line.size(); ++i)
|
|
{
|
|
if (current_line[i] == '"')
|
|
{
|
|
if (i == 1)
|
|
if (pedantic)
|
|
warning("leading whitespace before string");
|
|
|
|
get_string_line(out, i);
|
|
goto next;
|
|
}
|
|
else if (isspace(current_line[i]))
|
|
{
|
|
// skip
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return out.str();
|
|
}
|
|
|
|
static bool has_prefix(const std::string& lhs, const std::string rhs)
|
|
{
|
|
if (lhs.length() < rhs.length())
|
|
return false;
|
|
else
|
|
return lhs.compare(0, rhs.length(), rhs) == 0;
|
|
}
|
|
|
|
void
|
|
POParser::parse_header(const std::string& header)
|
|
{
|
|
std::string from_charset;
|
|
std::string::size_type start = 0;
|
|
for(std::string::size_type i = 0; i < header.length(); ++i)
|
|
{
|
|
if (header[i] == '\n')
|
|
{
|
|
std::string line = header.substr(start, i - start);
|
|
|
|
if (has_prefix(line, "Content-Type:"))
|
|
{
|
|
// from_charset = line.substr(len);
|
|
std::string::size_type len = strlen("Content-Type: text/plain; charset=");
|
|
if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
|
|
{
|
|
from_charset = line.substr(len);
|
|
|
|
for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
|
|
*ch = static_cast<char>(toupper(*ch));
|
|
}
|
|
else
|
|
{
|
|
warning("malformed Content-Type header");
|
|
}
|
|
}
|
|
else if (has_prefix(line, "Plural-Forms:"))
|
|
{
|
|
PluralForms plural_forms = PluralForms::from_string(line);
|
|
if (!plural_forms)
|
|
{
|
|
warning("unknown Plural-Forms given");
|
|
}
|
|
else
|
|
{
|
|
if (!dict.get_plural_forms())
|
|
{
|
|
dict.set_plural_forms(plural_forms);
|
|
}
|
|
else
|
|
{
|
|
if (dict.get_plural_forms() != plural_forms)
|
|
{
|
|
warning("Plural-Forms missmatch between .po file and dictionary");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
start = i+1;
|
|
}
|
|
}
|
|
|
|
if (from_charset.empty() || from_charset == "CHARSET")
|
|
{
|
|
warning("charset not specified for .po, fallback to utf-8");
|
|
from_charset = "UTF-8";
|
|
}
|
|
else if (from_charset == "BIG5")
|
|
{
|
|
big5 = true;
|
|
}
|
|
|
|
conv.set_charsets(from_charset, dict.get_charset());
|
|
}
|
|
|
|
bool
|
|
POParser::is_empty_line()
|
|
{
|
|
if (current_line.empty())
|
|
{
|
|
return true;
|
|
}
|
|
else if (current_line[0] == '#')
|
|
{ // handle comments as empty lines
|
|
if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])))
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
|
|
{
|
|
if (!isspace(*i))
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
POParser::prefix(const char* prefix_str)
|
|
{
|
|
return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
|
|
}
|
|
|
|
void
|
|
POParser::parse()
|
|
{
|
|
next_line();
|
|
|
|
// skip UTF-8 intro that some text editors produce
|
|
// see http://en.wikipedia.org/wiki/Byte-order_mark
|
|
if (current_line.size() >= 3 &&
|
|
current_line[0] == static_cast<char>(0xef) &&
|
|
current_line[1] == static_cast<char>(0xbb) &&
|
|
current_line[2] == static_cast<char>(0xbf))
|
|
{
|
|
current_line = current_line.substr(3);
|
|
}
|
|
|
|
// Parser structure
|
|
while(!eof)
|
|
{
|
|
try
|
|
{
|
|
bool fuzzy = false;
|
|
bool has_msgctxt = false;
|
|
std::string msgctxt;
|
|
std::string msgid;
|
|
|
|
while(prefix("#"))
|
|
{
|
|
if (current_line.size() >= 2 && current_line[1] == ',')
|
|
{
|
|
// FIXME: Rather simplistic hunt for fuzzy flag
|
|
if (current_line.find("fuzzy", 2) != std::string::npos)
|
|
fuzzy = true;
|
|
}
|
|
|
|
next_line();
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
{
|
|
if (prefix("msgctxt"))
|
|
{
|
|
has_msgctxt = true;
|
|
msgctxt = get_string(7);
|
|
}
|
|
|
|
if (prefix("msgid"))
|
|
msgid = get_string(5);
|
|
else
|
|
error("expected 'msgid'");
|
|
|
|
if (prefix("msgid_plural"))
|
|
{
|
|
std::string msgid_plural = get_string(12);
|
|
std::vector<std::string> msgstr_num;
|
|
bool saw_nonempty_msgstr = false;
|
|
|
|
next:
|
|
if (is_empty_line())
|
|
{
|
|
if (msgstr_num.empty())
|
|
error("expected 'msgstr[N] (0 <= N <= 9)'");
|
|
}
|
|
else if (prefix("msgstr[") &&
|
|
current_line.size() > 8 &&
|
|
isdigit(current_line[7]) && current_line[8] == ']')
|
|
{
|
|
std::string::size_type number = static_cast<std::string::size_type>(current_line[7] - '0');
|
|
std::string msgstr = get_string(9);
|
|
|
|
if(!msgstr.empty())
|
|
saw_nonempty_msgstr = true;
|
|
|
|
if (number >= msgstr_num.size())
|
|
msgstr_num.resize(number+1);
|
|
|
|
msgstr_num[number] = conv.convert(msgstr);
|
|
goto next;
|
|
}
|
|
else
|
|
{
|
|
error("expected 'msgstr[N]'");
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
error("expected 'msgstr[N]' or empty line");
|
|
|
|
if (saw_nonempty_msgstr)
|
|
{
|
|
if (use_fuzzy || !fuzzy)
|
|
{
|
|
if (!dict.get_plural_forms())
|
|
{
|
|
warning("msgstr[N] seen, but no Plural-Forms given");
|
|
}
|
|
else
|
|
{
|
|
if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
|
|
{
|
|
warning("msgstr[N] count doesn't match Plural-Forms.nplural");
|
|
}
|
|
}
|
|
|
|
if (has_msgctxt)
|
|
dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
|
|
else
|
|
dict.add_translation(msgid, msgid_plural, msgstr_num);
|
|
}
|
|
|
|
if (0)
|
|
{
|
|
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
|
|
std::cout << "msgid \"" << msgid << "\"" << std::endl;
|
|
std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
|
|
for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
|
|
std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl;
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
}
|
|
else if (prefix("msgstr"))
|
|
{
|
|
std::string msgstr = get_string(6);
|
|
|
|
if (msgid.empty())
|
|
{
|
|
parse_header(msgstr);
|
|
}
|
|
else if(!msgstr.empty())
|
|
{
|
|
if (use_fuzzy || !fuzzy)
|
|
{
|
|
if (has_msgctxt)
|
|
dict.add_translation(msgctxt, msgid, conv.convert(msgstr));
|
|
else
|
|
dict.add_translation(msgid, conv.convert(msgstr));
|
|
}
|
|
|
|
if (0)
|
|
{
|
|
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
|
|
std::cout << "msgid \"" << msgid << "\"" << std::endl;
|
|
std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl;
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
error("expected 'msgstr' or 'msgid_plural'");
|
|
}
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
error("expected empty line");
|
|
|
|
next_line();
|
|
}
|
|
catch(POParserError&)
|
|
{
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace tinygettext
|
|
|
|
/* EOF */
|