Skip to content

Commit bead852

Browse files
committed
fixed XML-entity decoding error
1 parent f78dc1a commit bead852

File tree

3 files changed

+20
-6
lines changed

3 files changed

+20
-6
lines changed

Changelog

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
-------------------------------
2+
Version 7.15 (19 October 2016)
3+
-------------------------------
4+
Fixed error in the decoding of XML-entities if the decoded string comprised
5+
more than one byte.
6+
17
-------------------------------
28
Version 7.14 (13 October 2016)
39
-------------------------------

src/cstlemma.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ You should have received a copy of the GNU General Public License
1919
along with CSTLEMMA; if not, write to the Free Software
2020
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2121
*/
22-
#define CSTLEMMAVERSION "7.14"
23-
#define CSTLEMMADATE "2016.10.13"
22+
#define CSTLEMMAVERSION "7.15"
23+
#define CSTLEMMADATE "2016.10.19"
2424
#define CSTLEMMACOPYRIGHT "2002-2016 Center for Sprogteknologi"
2525

2626
#include "lemmatiser.h"

src/wordReader.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -562,11 +562,19 @@ int wordReader::rawput(bool (wordReader::*fnc)(int kar),int kar)
562562
int wordReader::nrawput(bool (wordReader::*fnc)(int kar),char * c)
563563
{
564564
while(*c)
565-
if(!rawput(fnc,*c++))
566-
return false;
565+
//if(!rawput(fnc,*c++))
566+
rawput(fnc,*c++);
567567
return true;
568568
}
569569

570+
int myUnicodeToUtf8(int w,char * s,size_t len)
571+
{
572+
int writtenlength = UnicodeToUtf8(w,s,len);
573+
if(writtenlength >= 0)
574+
s[writtenlength] = '\0';
575+
return writtenlength;
576+
}
577+
570578
int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
571579
{
572580
if(kar == ';')
@@ -594,7 +602,7 @@ int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
594602
N = (buf[1] == 'x') ? strtoul(buf+2,NULL,16) : strtoul(buf+1,NULL,10);
595603
p = buf;
596604
xput = &wordReader::Put;
597-
if(UnicodeToUtf8(N,tmp,sizeof(tmp)))
605+
if(myUnicodeToUtf8(N,tmp,sizeof(tmp)))
598606
{
599607
return nrawput(fnc,tmp);
600608
}
@@ -619,7 +627,7 @@ int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
619627
char tmp[22];
620628
p = buf;
621629
xput = &wordReader::Put;
622-
if(UnicodeToUtf8(pItem->code,tmp,sizeof(tmp)))
630+
if(myUnicodeToUtf8(pItem->code,tmp,sizeof(tmp)))
623631
{
624632
return nrawput(fnc,tmp);
625633
}

0 commit comments

Comments
 (0)