-
Notifications
You must be signed in to change notification settings - Fork 15
/
BaseClassifier.pas
939 lines (782 loc) · 30.1 KB
/
BaseClassifier.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
// ###################################################################
// #### This file is part of the artificial intelligence project, and is
// #### offered under the licence agreement described on
// #### http://www.mrsoft.org/
// ####
// #### Copyright:(c) 2014, Michael R. . All rights reserved.
// ####
// #### Unless required by applicable law or agreed to in writing, software
// #### distributed under the License is distributed on an "AS IS" BASIS,
// #### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// #### See the License for the specific language governing permissions and
// #### limitations under the License.
// ###################################################################
unit BaseClassifier;
// #############################################################
// #### Defines an interface for classification algorithms used in the
// #### Boosting algorithms
// #############################################################
interface
uses SysUtils, Classes, Contnrs, Types, BaseMathPersistence, RandomEng;
type
TOnLearnIteration = procedure(Sender : TObject; progress : integer) of Object;
ECustomClassifierException = class(Exception);
TIntIntArray = Array of TIntegerDynArray;
// #############################################################
// #### most basic element - can be used in an example
TCustomFeatureList = class(TObject)
protected
fFeatureVecLen : integer;
function GetFeature(index : integer) : double; virtual; abstract;
procedure SetFeature(index : integer; value : double); virtual; abstract;
public
property FeatureVec[index : integer] : double read GetFeature write SetFeature; default;
property FeatureVecLen : integer read fFeatureVecLen;
procedure SetFeatureVec(const Feature : Array of Double); virtual; abstract;
end;
// #############################################################
// #### Base class for all examples - just an abstraction to get a base class
type
TCustomExample = class(TObject)
private
fFeatureVec : TCustomFeatureList;
fOwnsFeature : boolean;
public
property FeatureVec : TCustomFeatureList read fFeatureVec;
constructor Create(aFeatureVec : TCustomFeatureList; ownsFeatureVec : boolean);
destructor Destroy; override;
end;
type
TCustomLearnerExample = class(TCustomExample)
private
fClassVal : integer;
public
function Clone : TCustomLearnerExample; virtual;
property ClassVal : integer read fClassVal write fClassVal;
end;
TCustomLearnerExampleClass = class of TCustomLearnerExample;
// #############################################################
// #### List of examples - used in the training phase
type
TCustomExampleList = class(TObjectList)
private
function GetExample(index : integer) : TCustomExample;
procedure SetExample(index : integer; Value : TCustomExample);
public
property Example[index : integer] : TCustomExample read GetExample write SetExample;
procedure Add(Exmpl : TCustomExample);
end;
// #############################################################
// #### A dataset of weighted examples
type
TCustomLearnerExampleList = class(TObjectList)
private
fRandomAlg : TRandomAlgorithm;
fRandom : TRandomGenerator;
function InternalRandomDataSet(LearningSet : TCustomLearnerExampleList; StartIdx, EndIdx : integer; numElements : integer) : TCustomLearnerExampleList;
procedure SetRandomAlg(const Value: TRandomAlgorithm);
function GetExample(index : integer) : TCustomLearnerExample;
procedure SetExample(index : integer; Value : TCustomLearnerExample);
public
procedure CreateTrainAndValidationSet(validationDataSetPerc : integer; out trainSet, validationSet : TCustomLearnerExampleList);
function CreateBalancedDataSet : TCustomLearnerExampleList;
function CreateRandomDataSet(Percentage : integer) : TCustomLearnerExampleList;
function CreateRandomizedBalancedDataSet(Percentage : integer) : TCustomLearnerExampleList;
// clone without examples
function CloneBase : TCustomLearnerExampleList; virtual;
function Shuffle : TIntegerDynArray; // randomizes all the examples
function Rand : TRandomGenerator;
property Example[index : integer] : TCustomLearnerExample read GetExample write SetExample; default;
procedure Add(Exmpl : TCustomLearnerExample);
function NumClasses(var classVals : TIntegerDynArray) : integer;
property RandomAlg : TRandomAlgorithm read fRandomAlg write SetRandomAlg;
constructor Create;
destructor Destroy; override;
end;
TCustomLearnerExampleListClass = class of TCustomLearnerExampleList;
// #############################################################
// #### Base classifier class
type
TCustomClassifier = class(TBaseMathPersistence)
public
function Classify(Example : TCustomExample; var confidence : double) : integer; overload; virtual; abstract;
function Classify(Example : TCustomExample) : integer; overload; virtual;
end;
TCustomClassifierClass = class of TCustomClassifier;
// ######################################################
// #### Base progress class for learning a classification rule
type
TCommonLearnerProps = class(TObject)
private
fOnLearnProgress : TOnLearnIteration;
protected
procedure DoProgress(progress : integer);
public
property OnLearnProgress : TOnLearnIteration read fOnLearnProgress write fOnLearnProgress;
class function CanLearnClassifier(Classifier : TCustomClassifierClass) : boolean; virtual; abstract;
end;
// ######################################################
// #### All classifiers from this type support weighted examples
// in their learning steps - these types of classifiers are
// from interest when learning the adaboost classifier.
type
TCustomWeightedLearner = class(TCommonLearnerProps)
private
fDataSet : TCustomLearnerExampleList;
protected
property DataSet : TCustomLearnerExampleList read fDataSet;
function DoLearn(const weights : Array of double) : TCustomClassifier; virtual; abstract;
// returns the indices of an sorted array of features starting from the lowest
function CalcSortIdx(featureIdx : integer) : TIntegerDynArray; overload;
function CalcSortIdx(const dataSetIdx : TIntegerDynArray; featureIdx : integer) : TIntegerDynArray; overload;
function CountSortIdx(const dataSetIdx: TIntegerDynArray; featureIdx: integer): TIntegerDynArray;
procedure IdxCountSort(const Values: TIntegerDynArray; var Idx: TIntegerDynArray; Min, Max: integer); // by reference
procedure IdxQuickSort(const Values : TDoubleDynArray; var Idx : TIntegerDynArray; L, R : integer);
function IndexOfClasses(var Idx : TIntIntArray; var classes : TIntegerDynArray) : integer;
function Classes : TIntegerDynArray;
public
procedure Init(aDataSet : TCustomLearnerExampleList); virtual;
function Learn(const weights : Array of double) : TCustomClassifier; overload;
function Learn : TCustomClassifier; overload;
end;
TCustomWeightedLearnerClass = class of TCustomWeightedLearner;
// ######################################################
// #### Base classifier which cannot handle weigthing in the example list
type
TCustomLearner = class(TCustomWeightedLearner)
private
fOrigDataSet : TCustomLearnerExampleList;
// creates a new example list and adds already existing examples from the given list:
// weighting is achieved by duplicating
// items. e.g. if count=3 and weights are 0.66, 0.17, 0.17, then the
// result is count=5, example[0] x 3, example[1] x 1, example[2] x 1
// algorithm is:
// take max weigth and divide by 100: -> use that as minimum allowed weight and
// discard all examples lower than that weight.
// take the remaining lowest weight -> this example is put one time into the resulting
// array.
procedure BuildWeightedList(const weights : Array of double);
protected
function DoUnweightedLearn : TCustomClassifier; virtual; abstract;
function DoLearn(const weights : Array of double) : TCustomClassifier; override;
end;
TCustomLearnerClass = class of TCustomLearner;
implementation
uses Math, BaseMatrixExamples, Matrix;
{ TCustomExampleList }
procedure TCustomExampleList.Add(Exmpl: TCustomExample);
begin
inherited Add(Exmpl);
end;
function TCustomExampleList.GetExample(index: integer): TCustomExample;
begin
assert((Items[index] is TCustomExample), 'Item is not a feature');
Result := TCustomExample(Items[index]);
end;
procedure TCustomExampleList.SetExample(index: integer; Value: TCustomExample);
begin
Items[index] := value;
end;
{ TCommonClassifierProps }
procedure TCommonLearnerProps.DoProgress(progress: integer);
begin
if Assigned(fOnLearnProgress) then
fOnLearnProgress(Self, progress);
end;
{ TCustomLearnerExampleList }
procedure TCustomLearnerExampleList.Add(Exmpl: TCustomLearnerExample);
begin
inherited Add(Exmpl);
end;
function TCustomLearnerExampleList.GetExample(
index: integer): TCustomLearnerExample;
begin
//assert((Items[index] is TCustomLearnerExample), 'Item is not a feature');
Result := TCustomLearnerExample(Items[index]);
end;
function TCustomLearnerExampleList.NumClasses(
var classVals: TIntegerDynArray): integer;
var counter, clCnt : integer;
begin
SetLength(classVals, 2);
// determine number of classes in the dataset
Result := 0;
for counter := 0 to Count - 1 do
begin
classVals[Result] := Example[counter].ClassVal;
inc(Result);
for clCnt := 0 to Result - 2 do
begin
if classVals[clCnt] = classVals[Result - 1] then
begin
dec(Result);
break;
end;
end;
if Result = Length(classVals) then
SetLength(classVals, 2*Length(classVals));
end;
SetLength(classVals, Result);
end;
procedure TCustomLearnerExampleList.SetExample(index: integer;
Value: TCustomLearnerExample);
begin
Items[index] := Value;
end;
constructor TCustomLearnerExampleList.Create;
begin
inherited Create(True);
fRandomAlg := raMersenneTwister;
end;
function TCustomLearnerExampleList.InternalRandomDataSet(LearningSet : TCustomLearnerExampleList; StartIdx, EndIdx : integer; numElements : integer) : TCustomLearnerExampleList;
var idx : Array of integer;
i : Integer;
index : integer;
len : integer;
tmp : integer;
begin
// ensure that no double entries exists
SetLength(idx, EndIdx - StartIdx + 1);
for i := StartIdx to EndIdx do
idx[i - StartIdx] := i;
len := Length(idx);
// Fisher yates shuffle:
for i := Length(idx) - 1 downto 1 do
begin
index := Rand.RandInt(i + 1);
tmp := idx[index];
idx[index] := idx[i];
idx[i] := tmp;
end;
// now create the resulting array
Result := TCustomLearnerExampleList.Create; // LearningSet.ClassType.Create as TCustomLearnerExampleList;
Result.OwnsObjects := False;
Result.Capacity := len;
for i := 0 to numElements - 1 do
Result.Add(LearningSet[idx[i]]);
end;
function TCustomLearnerExampleList.CreateRandomDataSet(Percentage : integer) : TCustomLearnerExampleList;
var numElements : integer;
begin
Result := nil;
numElements := Min(Count, (Percentage*Count) div 100);
if numElements < 0 then
exit;
Result := InternalRandomDataSet(self, 0, Count - 1, numElements);
end;
function ClassSort(Item1, Item2 : Pointer) : integer;
begin
Result := TCustomLearnerExample(Item1).ClassVal - TCustomLearnerExample(Item2).ClassVal;
end;
function TCustomLearnerExampleList.CreateBalancedDataSet : TCustomLearnerExampleList;
var classes : Array of integer;
numCl : integer;
i : integer;
copyList : TCustomLearnerExampleList;
minNumElem : integer;
actNumElem : integer;
actClass : integer;
begin
Result := nil;
if Count = 0 then
exit;
// we only want to store references to the examples in the new data set
copyList := ClassType.Create as TCustomLearnerExampleList;
try
copyList.OwnsObjects := False;
copyList.Capacity := Count;
// first check out the number of classes and the number of elements belonging to these classes
for i := 0 to Count - 1 do
copyList.Add(Example[i]);
copyList.Sort({$IFDEF FPC}@{$ENDIF}ClassSort);
SetLength(classes, 10);
numCl := 1;
classes[0] := 1;
for i := 1 to copyList.Count - 1 do
begin
if copyList[i].ClassVal <> copyList[i - 1].ClassVal then
begin
inc(numCl);
if numCl >= Length(classes) then
SetLength(classes, Min(2*Length(classes), Length(classes) + 1000));
end;
inc(classes[numCl - 1]);
end;
// search for the class with the lowest number of elements
minNumElem := classes[0];
for i := 1 to numCl - 1 do
minNumElem := Min(minNumElem, classes[i]);
// create the resulting list:
Result := TCustomLearnerExampleList.Create; //ClassType.Create as TCustomLearnerExampleList;
Result.OwnsObjects := False;
Result.Capacity := minNumElem*numCl;
actNumElem := 0;
actClass := 0;
for i := 0 to copyList.Count - 1 do
begin
if actNumElem = classes[actClass] then
begin
inc(actClass);
actNumElem := 0;
end;
if actNumElem < minNumElem then
Result.Add(copyList[i]);
inc(actNumElem);
end;
finally
copyList.Free;
end;
end;
function TCustomLearnerExampleList.CreateRandomizedBalancedDataSet(Percentage : integer) : TCustomLearnerExampleList;
var classes : Array of integer;
numCl : integer;
i, j : integer;
copyList : TCustomLearnerExampleList;
minNumElem : integer;
actNumElem : integer;
actClass : integer;
aList : TCustomLearnerExampleList;
begin
Result := nil;
if Count = 0 then
exit;
// we only want to store references to the examples in the new data set
copyList := TCustomLearnerExampleList.Create; //ClassType.Create as TCustomLearnerExampleList;
try
copyList.OwnsObjects := False;
copyList.Capacity := Count;
// first check out the number of classes and the number of elements belonging to these classes
for i := 0 to Count - 1 do
copyList.Add(Example[i]);
copyList.Sort({$IFDEF FPC}@{$ENDIF}ClassSort);
SetLength(classes, 10);
numCl := 1;
classes[0] := 1;
for i := 1 to copyList.Count - 1 do
begin
if copyList[i].ClassVal <> copyList[i - 1].ClassVal then
begin
inc(numCl);
if numCl >= Length(classes) then
SetLength(classes, Min(2*Length(classes), Length(classes) + 1000));
end;
inc(classes[numCl - 1]);
end;
// search for the class with the lowest number of elements
minNumElem := classes[0];
for i := 1 to numCl - 1 do
minNumElem := Min(minNumElem, classes[i]);
minNumElem := (minNumElem*Max(0, Min(100, Percentage))) div 100;
// create the resulting list:
Result := TCustomLearnerExampleList.Create; //ClassType.Create as TCustomLearnerExampleList;
Result.OwnsObjects := False;
Result.Capacity := minNumElem*numCl;
actNumElem := 0;
actClass := 0;
for i := 0 to numCl - 1 do
begin
// this line ensures that consecutive calls to this routine does not result in the same resulting dataset
aList := InternalRandomDataSet(copyList, actNumElem, actNumElem + Classes[actClass] - 1, minNumElem);
try
for j := 0 to aList.Count - 1 do
Result.Add(aList[j]);
finally
aList.Free;
end;
inc(actNumElem, Classes[actClass]);
inc(actClass);
end;
finally
copyList.Free;
end;
end;
function TCustomLearnerExampleList.Rand: TRandomGenerator;
begin
if not Assigned(fRandom) then
begin
fRandom := TRandomGenerator.Create;
fRandom.RandMethod := RandomAlg;
fRandom.Init(0);
end;
Result := fRandom;
end;
procedure TCustomLearnerExampleList.SetRandomAlg(const Value: TRandomAlgorithm);
begin
fRandomAlg := Value;
if Assigned(fRandom) then
FreeAndNil(fRandom);
end;
destructor TCustomLearnerExampleList.Destroy;
begin
fRandom.Free;
inherited;
end;
procedure TCustomLearnerExampleList.CreateTrainAndValidationSet(
validationDataSetPerc : integer; out trainSet,
validationSet: TCustomLearnerExampleList);
var counter : integer;
numValidationElem : integer;
begin
assert((validationDataSetPerc <= 100) and (validationDataSetPerc >= 0), 'Percentage needs to be between 0 and 100');
trainSet := CreateRandomDataSet(100);
validationSet := self.ClassType.Create as TCustomLearnerExampleList;
// special case: 0 or 100 - just clone the train set: validation set == trainSet
if (validationDataSetPerc = 0) or (validationDataSetPerc = 100) then
begin
validationSet.Capacity := trainSet.Count;
for counter := 0 to trainset.Count - 1 do
validationSet.Add(trainset[counter]);
end
else
begin
numValidationElem := Max(1, Floor(validationDataSetPerc/100*trainSet.Count));
for counter := 0 to numValidationElem - 1 do
begin
validationSet.Add(trainSet[trainSet.Count - 1 - counter]);
trainSet[trainSet.Count - 1 - counter] := nil;
end;
// delete last elements
trainSet.Pack;
end;
end;
function TCustomLearnerExampleList.Shuffle : TIntegerDynArray;
var i : integer;
index : integer;
tmp : integer;
begin
SetLength(Result, Count);
for i := 0 to Count - 1 do
Result[i] := i;
// Fisher yates shuffle:
for i := Count - 1 downto 1 do
begin
index := Rand.RandInt(i + 1);
tmp := Result[i];
Result[i] := Result[index];
Result[index] := tmp;
end;
end;
function TCustomLearnerExampleList.CloneBase: TCustomLearnerExampleList;
begin
Result := TCustomLearnerExampleListClass(Self.ClassType).Create;
Result.fRandomAlg := fRandomAlg;
end;
{ TCustomWeightedLearner }
procedure TCustomWeightedLearner.IdxQuickSort(const Values : TDoubleDynArray; var Idx : TIntegerDynArray; L, R : integer);
var I, J: Integer;
T: integer;
P : double;
begin
// indexed quick sort implementation of for double values
repeat
I := L;
J := R;
P := values[Idx[(L + r) shr 1]];
repeat
while values[Idx[i]] < P do
Inc(I);
while values[Idx[j]] > P do
Dec(J);
if I <= J then
begin
T := Idx[I];
Idx[I] := Idx[J];
Idx[J] := T;
Inc(I);
Dec(J);
end;
until I > J;
if L < J then
IdxQuickSort(Values, Idx, L, J);
L := I;
until I >= R;
end;
function TCustomWeightedLearner.CalcSortIdx(featureIdx: integer): TIntegerDynArray;
var values : TDoubleDynArray;
j : integer;
begin
assert(Assigned(DataSet) and (DataSet.Count > 0), 'Error no data set assigned');
assert(featureIdx < DataSet[0].FeatureVec.FeatureVecLen, 'Feature Index out of bounds');
SetLength(Result, DataSet.Count);
SetLength(values, DataSet.Count);
for j := 0 to DataSet.Count - 1 do
begin
Result[j] := j;
values[j] := DataSet[j].FeatureVec[featureIdx];
end;
IdxQuickSort(Values, Result, 0, DataSet.Count - 1);
end;
function TCustomWeightedLearner.CalcSortIdx(const dataSetIdx: TIntegerDynArray;
featureIdx: integer): TIntegerDynArray;
var values : TDoubleDynArray;
j : integer;
begin
assert(Assigned(DataSet) and (DataSet.Count >= Length(dataSetIdx)), 'Error no data set assigned');
assert(featureIdx < DataSet[0].FeatureVec.FeatureVecLen, 'Feature Index out of bounds');
SetLength(Result, Length(dataSetIdx));
SetLength(values, Length(dataSetIdx));
if DataSet is TMatrixLearnerExampleList then
begin
for j := 0 to Length(dataSetIdx) - 1 do
begin
Result[j] := j;
values[j] := TMatrixLearnerExampleList(DataSet).Matrix[dataSetIdx[j], featureIdx];
end;
end
else
begin
for j := 0 to Length(dataSetIdx) - 1 do
begin
Result[j] := j;
values[j] := DataSet[dataSetIdx[j]].FeatureVec[featureIdx];
end;
end;
IdxQuickSort(values, Result, 0, Length(dataSetIdx) - 1);
end;
procedure TCustomWeightedLearner.IdxCountSort(const Values : TIntegerDynArray; var Idx : TIntegerDynArray; Min,Max : integer);
Var count: Array of integer;
I, n : integer;
begin
SetLength(count, max - min + 1);
n := Length(Values);
for I := 0 to (max - min) do count[I] := 0;
for I := 0 to (n - 1) do
count[Values[I] - min] := count[Values[I] - min] + 1;
// compute the total
Count[0] := Count[0]-1; //make sure it starts at zero
for I := 1 to (max - min) do
count[I] := Count[I-1]+Count[i];
for I := n-1 downto 0 do
begin
Idx[Count[Values[i]-min]] := I;
Dec(Count[Values[i]-min]);
end;
end;
function TCustomWeightedLearner.CountSortIdx(const dataSetIdx: TIntegerDynArray;
featureIdx : integer): TIntegerDynArray;
var values : TIntegerDynArray;
j : integer;
minVal, maxVal : integer;
begin
assert(Assigned(DataSet) and (DataSet.Count >= Length(dataSetIdx)), 'Error no data set assigned');
assert(featureIdx < DataSet[0].FeatureVec.FeatureVecLen, 'Feature Index out of bounds');
SetLength(Result, Length(dataSetIdx));
SetLength(values, Length(dataSetIdx));
minVal := MaxInt;
maxval := -MaxInt;
if DataSet is TMatrixLearnerExampleList then
begin
for j := 0 to Length(dataSetIdx) - 1 do
begin
values[j] := Round(TMatrixLearnerExampleList(DataSet).Matrix[dataSetIdx[j], featureIdx]);
minVal := min(minVal, values[j]);
maxVal := max(maxVal, values[j]);
end;
end
else
begin
for j := 0 to Length(dataSetIdx) - 1 do
begin
Values[j] := Round(DataSet[DataSetIdx[j]].FeatureVec[FeatureIdx]);
minVal := min(minVal, values[j]);
maxVal := max(maxVal, values[j]);
end;
end;
IdxCountSort(values, Result, MinVal, MaxVal);
end;
function TCustomWeightedLearner.Classes: TIntegerDynArray;
var counter, clsCnt : integer;
found : boolean;
actClass : integer;
numClasses : integer;
begin
SetLength(Result, 10);
numClasses := 0;
// #########################################################
// #### store class indicess in the output array
for counter := 0 to DataSet.Count - 1 do
begin
found := False;
actClass := DataSet[counter].ClassVal;
for clsCnt := 0 to numClasses - 1 do
begin
if actClass = Result[clsCnt] then
begin
found := True;
break;
end;
end;
if not Found then
begin
if Length(Result) - 1 <= numClasses then
SetLength(Result, Length(Result)*2);
Result[numClasses] := actClass;
inc(numClasses);
end;
end;
SetLength(Result, numClasses);
end;
function TCustomWeightedLearner.IndexOfClasses(var Idx: TIntIntArray;
var classes: TIntegerDynArray): integer;
var counter, clsCnt : integer;
found : boolean;
actClass : integer;
i: Integer;
numItems : TIntegerDynArray;
begin
Result := 0;
SetLength(classes, 10);
SetLength(Idx, 10);
SetLength(numItems, 10);
// #########################################################
// #### store example indexes - and the count in the first index
for counter := 0 to DataSet.Count - 1 do
begin
found := False;
actClass := DataSet[counter].ClassVal;
for clsCnt := 0 to Result - 1 do
begin
if actClass = classes[clsCnt] then
begin
found := True;
inc(numItems[clsCnt]);
if (Length(Idx[clsCnt]) <= numItems[clscnt]) then
SetLength(Idx[clsCnt], 20 + Length(Idx[clsCnt]));
Idx[clsCnt][numItems[clsCnt] - 1] := counter;
break;
end;
end;
if not Found then
begin
if Length(Idx) - 1 <= Result then
begin
SetLength(idx, Length(idx)*2);
SetLength(classes, Length(classes)*2);
SetLength(numItems, Length(classes)*2);
numItems[Result] := 0;
end;
SetLength(idx[Result], 10);
idx[Result][0] := counter;
classes[Result] := actClass;
inc(numItems[Result]);
inc(Result);
end;
end;
SetLength(idx, Result);
for i := 0 to Result - 1 do
SetLength(idx[i], numItems[i]);
SetLength(classes, Result);
end;
procedure TCustomWeightedLearner.Init(aDataSet: TCustomLearnerExampleList);
begin
fDataSet := aDataSet;
end;
function TCustomWeightedLearner.Learn(const weights: array of double): TCustomClassifier;
begin
assert(Assigned(fDataSet), 'Error, call init before learn');
if High(weights) <> fDataSet.Count - 1 then
raise Exception.Create('Number of weights differs from the number of examples');
Result := DoLearn(weights);
end;
function TCustomWeightedLearner.Learn: TCustomClassifier;
var weights : TDoubleDynArray;
i : integer;
begin
assert(Assigned(fDataSet), 'Error, call init before learn');
// learn the classifier without weighting -> create equal weights for each example
SetLength(weights, fDataSet.Count);
for i := 0 to Length(weights) - 1 do
weights[i] := 1/Length(weights);
Result := DoLearn(Weights);
end;
{ TCustomLearner }
function TCustomLearner.DoLearn(
const weights: array of double): TCustomClassifier;
begin
// build a new list and use it like the original one
BuildWeightedList(weights);
try
Result := DoUnweightedLearn;
finally
// cleanup the intermediate dataset
if Assigned(fOrigDataSet) then
begin
fDataSet.Free;
fDataSet := fOrigDataSet;
fOrigDataSet := nil;
end;
end;
end;
procedure TCustomLearner.BuildWeightedList(const weights: array of double);
var maxWeight : double;
counter: Integer;
minAllowedWeight : double;
minWeight : double;
numExmpl : integer;
i : integer;
exmpl : TCustomLearnerExample;
begin
// search for the maximum:
if length(weights) = 0 then
exit;
maxWeight := weights[0];
for counter := 1 to Length(weights) - 1 do
maxWeight := max(maxWeight, weights[counter]);
minAllowedWeight := maxWeight/100;
// find minimum
minWeight := maxWeight;
for counter := 0 to Length(weights) - 1 do
begin
if (weights[counter] < minWeight) and (weights[counter] >= minAllowedWeight) then
minWeight := weights[counter];
end;
// check if evenly distributed - if so do nothing
if minWeight = maxWeight then
exit;
fOrigDataSet := fDataSet;
fDataSet := fOrigDataSet.CloneBase;
fDataSet.fRandomAlg := fOrigDataSet.fRandomAlg;
// build resulting example list and multiple add the examples according to the weighting
for counter := 0 to Length(weights) - 1 do
begin
// remove all examples lower than the given weight
if weights[counter] < minWeight
then
continue
else
begin
numExmpl := ceil(weights[counter]/minWeight);
// clone and add examples
for i := 0 to numExmpl - 1 do
begin
exmpl := TCustomLearnerExample(fOrigDataSet.GetExample(counter).Clone);
fDataSet.Add(exmpl);
end;
end;
end;
end;
{ TCustomExample }
constructor TCustomExample.Create(aFeatureVec: TCustomFeatureList; ownsFeatureVec : boolean);
begin
fFeatureVec := aFeatureVec;
fOwnsFeature := ownsFeatureVec;
end;
destructor TCustomExample.Destroy;
begin
if fOwnsFeature then
fFeatureVec.Free;
inherited;
end;
{ TCustomClassifier }
function TCustomClassifier.Classify(Example: TCustomExample): integer;
var conf : double;
begin
Result := Classify(Example, conf);
end;
{ TCustomLearnerExample }
function TCustomLearnerExample.Clone: TCustomLearnerExample;
begin
Result := TCustomLearnerExampleClass(self.ClassType).Create(fFeatureVec, False);
Result.fClassVal := fClassVal;
end;
end.