Skip to content

Commit b4759be

Browse files
committed
gcc 4.8 compile fix
make ProxyIterator::reference a reference (not a value) in order to truly satisfy random access iterator requirements, because std algs were trying to std::swap rvalues added swap(ProxyIterator &,ProxyIterator &) and (shallow) swap of SizedProxy.Inner() to go with existing deep swap for SizedProxy; I know there are other ProxyIterators but the code I'm building doesn't need swap for those. added a //TODO: comment with a concern I have about SizedProxy's deep swap (which was only recently added); I (graehl) don't understand KenLM well enough to judge. avoid some new gcc 4.8 warnings made compile_query_only.sh respect CXX env var
1 parent acbf805 commit b4759be

8 files changed

+72
-44
lines changed

clean_query_only.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
#!/bin/bash
2-
rm -rf {lm,util,util/double-conversion}/*.o bin/{query,kenlm_max_order,build_binary}
2+
rm -rf {lm,util,util/double-conversion}/*.o bin/{query,build_binary}

compile_query_only.sh

+8-4
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,21 @@ echo Note: You must use ./bjam if you want language model estimation or filterin
66
rm {lm,util}/*.o 2>/dev/null
77
set -e
88

9-
CXXFLAGS="-I. -O3 -DNDEBUG -DKENLM_MAX_ORDER=6 $CXXFLAGS"
9+
CXX=${CXX:-g++}
10+
CXXFLAGS+=" -I. -O3 -DNDEBUG -DKENLM_MAX_ORDER=6"
11+
echo '$CXX $CXXFLAGS'
12+
echo $CXX $CXXFLAGS
1013

1114
#Grab all cc files in these directories except those ending in test.cc or main.cc
1215
objects=""
1316
for i in util/double-conversion/*.cc util/*.cc lm/*.cc; do
1417
if [ "${i%test.cc}" == "$i" ] && [ "${i%main.cc}" == "$i" ]; then
15-
g++ $CXXFLAGS -c $i -o ${i%.cc}.o
18+
$CXX $CXXFLAGS -c $i -o ${i%.cc}.o
1619
objects="$objects ${i%.cc}.o"
1720
fi
1821
done
1922

2023
mkdir -p bin
21-
g++ $CXXFLAGS lm/build_binary_main.cc $objects -lrt -o bin/build_binary
22-
g++ $CXXFLAGS lm/query_main.cc $objects -lrt -o bin/query
24+
[[ `uname` = Darwin ]] || CXXFLAGS+=" -lrt"
25+
$CXX $CXXFLAGS lm/build_binary_main.cc $objects $lrt -o bin/build_binary
26+
$CXX $CXXFLAGS lm/query_main.cc $objects $lrt -o bin/query

lm/search_hashed.cc

+11-12
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ template <class Weights> class ActivateUnigram {
5454
Weights *modify_;
5555
};
5656

57-
// Find the lower order entry, inserting blanks along the way as necessary.
57+
// Find the lower order entry, inserting blanks along the way as necessary.
5858
template <class Value> void FindLower(
5959
const std::vector<uint64_t> &keys,
6060
typename Value::Weights &unigram,
@@ -64,7 +64,7 @@ template <class Value> void FindLower(
6464
typename Value::ProbingEntry entry;
6565
// Backoff will always be 0.0. We'll get the probability and rest in another pass.
6666
entry.value.backoff = kNoExtensionBackoff;
67-
// Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
67+
// Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
6868
for (int lower = keys.size() - 2; ; --lower) {
6969
if (lower == -1) {
7070
between.push_back(&unigram);
@@ -77,11 +77,11 @@ template <class Value> void FindLower(
7777
}
7878
}
7979

80-
// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
80+
// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
8181
template <class Added, class Build> void AdjustLower(
8282
const Added &added,
8383
const Build &build,
84-
std::vector<typename Build::Value::Weights *> &between,
84+
std::vector<typename Build::Value::Weights *> &between,
8585
const unsigned int n,
8686
const std::vector<WordIndex> &vocab_ids,
8787
typename Build::Value::Weights *unigrams,
@@ -93,14 +93,14 @@ template <class Added, class Build> void AdjustLower(
9393
}
9494
typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
9595
float prob = -fabs(between.back()->prob);
96-
// Order of the n-gram on which probabilities are based.
96+
// Order of the n-gram on which probabilities are based.
9797
unsigned char basis = n - between.size();
9898
assert(basis != 0);
9999
typename Build::Value::Weights **change = &between.back();
100100
// Skip the basis.
101101
--change;
102102
if (basis == 1) {
103-
// Hallucinate a bigram based on a unigram's backoff and a unigram probability.
103+
// Hallucinate a bigram based on a unigram's backoff and a unigram probability.
104104
float &backoff = unigrams[vocab_ids[1]].backoff;
105105
SetExtension(backoff);
106106
prob += backoff;
@@ -128,14 +128,14 @@ template <class Added, class Build> void AdjustLower(
128128
typename std::vector<typename Value::Weights *>::const_iterator i(between.begin());
129129
build.MarkExtends(**i, added);
130130
const typename Value::Weights *longer = *i;
131-
// Everything has probability but is not marked as extending.
131+
// Everything has probability but is not marked as extending.
132132
for (++i; i != between.end(); ++i) {
133133
build.MarkExtends(**i, *longer);
134134
longer = *i;
135135
}
136136
}
137137

138-
// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds.
138+
// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds.
139139
template <class Build> void MarkLower(
140140
const std::vector<uint64_t> &keys,
141141
const Build &build,
@@ -145,7 +145,7 @@ template <class Build> void MarkLower(
145145
const typename Build::Value::Weights &longer) {
146146
if (start_order == 0) return;
147147
typename util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash>::MutableIterator iter;
148-
// Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
148+
// Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
149149
for (int even_lower = start_order - 2 /* index in middle */; ; --even_lower) {
150150
if (even_lower == -1) {
151151
build.MarkExtends(unigram, longer);
@@ -168,7 +168,6 @@ template <class Build, class Activate, class Store> void ReadNGrams(
168168
Store &store,
169169
PositiveProbWarn &warn) {
170170
typedef typename Build::Value Value;
171-
typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
172171
assert(n >= 2);
173172
ReadNGramHeader(f, n);
174173

@@ -186,7 +185,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(
186185
for (unsigned int h = 1; h < n - 1; ++h) {
187186
keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]);
188187
}
189-
// Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0.
188+
// Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0.
190189
util::SetSign(entry.value.prob);
191190
entry.key = keys[n-2];
192191

@@ -203,7 +202,7 @@ template <class Build, class Activate, class Store> void ReadNGrams(
203202

204203
} // namespace
205204
namespace detail {
206-
205+
207206
template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
208207
std::size_t allocated = Unigram::Size(counts[0]);
209208
unigram_ = Unigram(start, counts[0], allocated);

lm/search_hashed.hh

+7-6
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ template <class Value> class HashedSearch {
7171
static const bool kDifferentRest = Value::kDifferentRest;
7272
static const unsigned int kVersion = 0;
7373

74-
// TODO: move probing_multiplier here with next binary file format update.
74+
// TODO: move probing_multiplier here with next binary file format update.
7575
static void UpdateConfigFromBinary(int, const std::vector<uint64_t> &, Config &) {}
7676

7777
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
@@ -103,6 +103,7 @@ template <class Value> class HashedSearch {
103103
}
104104

105105
#pragma GCC diagnostic ignored "-Wuninitialized"
106+
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
106107
MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
107108
node = extend_pointer;
108109
typename Middle::ConstIterator found;
@@ -126,14 +127,14 @@ template <class Value> class HashedSearch {
126127
}
127128

128129
LongestPointer LookupLongest(WordIndex word, const Node &node) const {
129-
// Sign bit is always on because longest n-grams do not extend left.
130+
// Sign bit is always on because longest n-grams do not extend left.
130131
typename Longest::ConstIterator found;
131132
if (!longest_.Find(CombineWordHash(node, word), found)) return LongestPointer();
132133
return LongestPointer(found->value.prob);
133134
}
134135

135-
// Generate a node without necessarily checking that it actually exists.
136-
// Optionally return false if it's know to not exist.
136+
// Generate a node without necessarily checking that it actually exists.
137+
// Optionally return false if it's know to not exist.
137138
bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
138139
assert(begin != end);
139140
node = static_cast<Node>(*begin);
@@ -144,7 +145,7 @@ template <class Value> class HashedSearch {
144145
}
145146

146147
private:
147-
// Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
148+
// Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
148149
void DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn);
149150

150151
template <class Build> void ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build);
@@ -153,7 +154,7 @@ template <class Value> class HashedSearch {
153154
public:
154155
Unigram() {}
155156

156-
Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
157+
Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
157158
unigram_(static_cast<typename Value::Weights*>(start))
158159
#ifdef DEBUG
159160
, count_(count)

util/double-conversion/utils.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,11 @@ template <class Dest, class Source>
299299
inline Dest BitCast(const Source& source) {
300300
// Compile time assertion: sizeof(Dest) == sizeof(Source)
301301
// A compile error here means your Dest and Source have different sizes.
302-
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
302+
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
303+
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
304+
__attribute__((unused))
305+
#endif
306+
;
303307

304308
Dest dest;
305309
memmove(&dest, &source, sizeof(dest));

util/file.cc

+7-7
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ std::size_t GuardLarge(std::size_t size) {
116116
// The following operating systems have broken read/write/pread/pwrite that
117117
// only supports up to 2^31.
118118
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
119-
return std::min(static_cast<std::size_t>(INT_MAX), size);
119+
return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
120120
#else
121121
return size;
122122
#endif
@@ -209,7 +209,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
209209
#endif
210210
errno = 0;
211211
do {
212-
ret =
212+
ret =
213213
#if defined(_WIN32) || defined(_WIN64)
214214
_write
215215
#else
@@ -229,7 +229,7 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
229229
}
230230

231231
void FSyncOrThrow(int fd) {
232-
// Apparently windows doesn't have fsync?
232+
// Apparently windows doesn't have fsync?
233233
#if !defined(_WIN32) && !defined(_WIN64)
234234
UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
235235
#endif
@@ -248,7 +248,7 @@ template <> struct CheckOffT<8> {
248248
typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
249249
#endif
250250

251-
// Can't we all just get along?
251+
// Can't we all just get along?
252252
void InternalSeek(int fd, int64_t off, int whence) {
253253
if (
254254
#if defined(_WIN32) || defined(_WIN64)
@@ -457,9 +457,9 @@ bool TryName(int fd, std::string &out) {
457457
std::ostringstream convert;
458458
convert << fd;
459459
name += convert.str();
460-
460+
461461
struct stat sb;
462-
if (-1 == lstat(name.c_str(), &sb))
462+
if (-1 == lstat(name.c_str(), &sb))
463463
return false;
464464
out.resize(sb.st_size + 1);
465465
ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
@@ -471,7 +471,7 @@ bool TryName(int fd, std::string &out) {
471471
}
472472
out.resize(ret);
473473
// Don't use the non-file names.
474-
if (!out.empty() && out[0] != '/')
474+
if (!out.empty() && out[0] != '/')
475475
return false;
476476
return true;
477477
#endif

util/proxy_iterator.hh

+15-10
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66

77
/* This is a RandomAccessIterator that uses a proxy to access the underlying
88
* data. Useful for packing data at bit offsets but still using STL
9-
* algorithms.
9+
* algorithms.
1010
*
1111
* Normally I would use boost::iterator_facade but some people are too lazy to
1212
* install boost and still want to use my language model. It's amazing how
13-
* many operators an iterator has.
13+
* many operators an iterator has.
1414
*
1515
* The Proxy needs to provide:
1616
* class InnerIterator;
@@ -22,32 +22,37 @@
2222
* operator<(InnerIterator)
2323
* operator+=(std::ptrdiff_t)
2424
* operator-(InnerIterator)
25-
* and of course whatever Proxy needs to dereference it.
25+
* and of course whatever Proxy needs to dereference it.
2626
*
27-
* It's also a good idea to specialize std::swap for Proxy.
27+
* It's also a good idea to specialize std::swap for Proxy.
2828
*/
2929

3030
namespace util {
3131
template <class Proxy> class ProxyIterator {
3232
private:
33-
// Self.
33+
// Self.
3434
typedef ProxyIterator<Proxy> S;
3535
typedef typename Proxy::InnerIterator InnerIterator;
3636

3737
public:
3838
typedef std::random_access_iterator_tag iterator_category;
3939
typedef typename Proxy::value_type value_type;
4040
typedef std::ptrdiff_t difference_type;
41-
typedef Proxy reference;
41+
typedef Proxy & reference;
4242
typedef Proxy * pointer;
4343

4444
ProxyIterator() {}
4545

46-
// For cast from non const to const.
46+
// For cast from non const to const.
4747
template <class AlternateProxy> ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
4848
explicit ProxyIterator(const Proxy &p) : p_(p) {}
4949

50-
// p_'s operator= does value copying, but here we want iterator copying.
50+
// p_'s swap does value swapping, but here we want iterator swapping
51+
friend inline void swap(ProxyIterator<Proxy> &first, ProxyIterator<Proxy> &second) {
52+
swap(first.I(), second.I());
53+
}
54+
55+
// p_'s operator= does value copying, but here we want iterator copying.
5156
S &operator=(const S &other) {
5257
I() = other.I();
5358
return *this;
@@ -72,8 +77,8 @@ template <class Proxy> class ProxyIterator {
7277

7378
std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); }
7479

75-
Proxy operator*() { return p_; }
76-
const Proxy operator*() const { return p_; }
80+
Proxy &operator*() { return p_; }
81+
const Proxy &operator*() const { return p_; }
7782
Proxy *operator->() { return &p_; }
7883
const Proxy *operator->() const { return &p_; }
7984
Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); }

util/sized_iterator.hh

+18-3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ class SizedInnerIterator {
3636
void *Data() { return ptr_; }
3737
std::size_t EntrySize() const { return size_; }
3838

39+
friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
40+
std::swap(first.ptr_, second.ptr_);
41+
std::swap(first.size_, second.size_);
42+
}
43+
3944
private:
4045
uint8_t *ptr_;
4146
std::size_t size_;
@@ -64,9 +69,19 @@ class SizedProxy {
6469
const void *Data() const { return inner_.Data(); }
6570
void *Data() { return inner_.Data(); }
6671

72+
/**
73+
// TODO: this (deep) swap was recently added. why? if any std heap sort etc
74+
// algs are using swap, that's going to be worse performance than using
75+
// =. i'm not sure why we *want* a deep swap. if C++11 compilers are
76+
// choosing between move constructor and swap, then we'd better implement a
77+
// (deep) move constructor. it may also be that this is moot since i made
78+
// ProxyIterator a reference and added a shallow ProxyIterator swap? (I
79+
// need Ken or someone competent to judge whether that's correct also. -
80+
// let me know at [email protected]
81+
*/
6782
friend void swap(SizedProxy &first, SizedProxy &second) {
6883
std::swap_ranges(
69-
static_cast<char*>(first.inner_.Data()),
84+
static_cast<char*>(first.inner_.Data()),
7085
static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
7186
static_cast<char*>(second.inner_.Data()));
7287
}
@@ -87,7 +102,7 @@ typedef ProxyIterator<SizedProxy> SizedIterator;
87102

88103
inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
89104

90-
// Useful wrapper for a comparison function i.e. sort.
105+
// Useful wrapper for a comparison function i.e. sort.
91106
template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
92107
public:
93108
explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
@@ -106,7 +121,7 @@ template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public
106121
}
107122

108123
const Delegate &GetDelegate() const { return delegate_; }
109-
124+
110125
private:
111126
const Delegate delegate_;
112127
};

0 commit comments

Comments
 (0)