-
Notifications
You must be signed in to change notification settings - Fork 22
/
File.inc
1304 lines (1234 loc) · 38.9 KB
/
File.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?php
/**
* @file
*
* Helper functions for file processing.
*
* Notes:
* Beware of fseek() you can move it to the end of the file or beyond the end of the file, and feof() won't return the
* correct value as it depends on attempting to 'read' passed the end of the file to work.
*
*
* End of line (EOL) sequences
* Windows end of line sequence: \r\n
* Unix end of line sequence: \n
* Mac end of line sequence: \r
*
* The class implementations are correct but very very slow
*/
/**
* Object Oriented file processing.
*/
class File {
/**
* File pointer resource.
*
* @var resource
*/
protected $handle;
/**
* Frequently we need to save the current pointer do some processing then return to the old pointer positon this
* instance variable a LIFO used to achieve that.
*
* @var array
*/
protected $history;
/**
* Creates a File instance.
*
* Note that the $handle transfers ownership to this class, and should not be closed outside of this class.
*
* @param resource $handle
* A valid file handle as returned by fopen().
*
* @throws InvalidArgumentException
* If the $handle provided is not vaild.
*/
public function __construct($handle) {
if ($handle === FALSE) {
throw new InvalidArgumentException(__CLASS__ . ' invalid file handle given.');
}
$this->handle = $handle;
}
/**
* Clean up the allocated file for this class.
*/
public function __destruct() {
if (isset($this->handle)) {
fclose($this->handle);
}
}
/**
* Gets dynamic variables for this class.
*
* "pos":
* The position of the file pointer referenced by handle as an integer; i.e., its offset
* into the file stream. If an error occurs, returns FALSE.
*
* @param string $name
*/
public function __get($name) {
switch ($name) {
case 'pos':
return ftell($this->handle);
case 'size':
$stats = $this->stat();
return $stats['size'];
}
if ($name == 'pos') {
return ftell($this->handle);
}
throw new InvalidArgumentException("$name isn't a property of " . __CLASS__);
}
/**
* Passes calls though to PHP filesystem functions using the file handle.
*
* @param string $name
* The function name.
* @param array $arguments
* The arguments to the given function.
*
* @return mixed
* Depends on the function called @see the PHP file system docs http://uk.php.net/manual/en/ref.filesystem.php.
*/
public function __call($name, $arguments) {
$functions = array('rewind', 'fgetc', 'fgets', 'fstat');
$aliases = array(
'getc' => 'fgetc',
'gets' => 'fgets',
'read' => 'fread',
'stat' => 'fstat',
); // Alias for PHP filesystem functions, for cleaner looks.
$exists = array_search($name, $functions) !== FALSE; // Function exists
$aliased = array_key_exists($name, $aliases);
if ($exists || $aliased) {
array_unshift($arguments, $this->handle); // Handle is always the first parameter.
return call_user_func_array($aliased ? $aliases[$name] : $name, $arguments);
}
throw new InvalidArgumentException("$name isn't a method of " . __CLASS__);
}
/**
* Sets the position of the file pointer.
*
* @param int $offset
* An offset in bytes to move the pointer from the specified $whence value.
* Can be positve or negative.
* @param int $whence
* The context in which to evaluate the given $offset. The excepted values are:
* SEEK_SET - Set position equal to offset bytes.
* SEEK_CUR - Set position to current location plus offset.
* SEEK_END - Set position to end-of-file plus offset.
* @param boolean $eof
* Allow this function to seek passed the EOF.
*
* @return boolean
* TRUE if the seek succeeded, FALSE otherwise.
*/
public function seek($offset = NULL, $whence = SEEK_SET, $eof = TRUE) {
$ret = fseek($this->handle, $offset, $whence) == 0;
if (!$eof && $this->EOF()) { // Not passed EOF
fseek($this->handle, 0, SEEK_END);
return FALSE;
}
return $ret;
}
/**
* Checks to see if the file pointer is at the begining of the file.
*
* @return boolean
* TRUE if this position is at the start of the file FALSE otherwise.
*/
public function start() {
return $this->pos == 0;
}
/**
* Checks if the file pointer is on EOF character.
*
* @return boolean
* TRUE if the file pointer is on a EOF character FALSE otherwise.
*/
public function EOF() {
/**
* feof() is not always return TRUE when the file pointer is on the EOF character. It requires an attempt
* to read the EOF character to be set, and will not be set if you simply seek to the EOF character.
*/
return $this->peekc() === FALSE;
}
/**
* Peeks at the current character.
*
* @return string
* The single character the file pointer is currently pointing at. FALSE when the character is EOF.
*/
public function peekc() {
$this->push();
$c = fgetc($this->handle);
$this->pop();
return $c;
}
/**
* Peeks at the current line.
*
* Reading ends when length - 1 bytes have been read, on a newline (which is included in the return value),
* or on EOF (whichever comes first). If no length is specified, it will keep reading from the stream until
* it reaches the end of the line.
*
* @param int $length
* The max number of bytes to read from the current line, it must be a positive value greater than 0.
*
* @return string
* The current line up to the given $length -1 or the last EOL character encounter, or FALSE if EOF.
*/
function peeks($length = 0) {
$this->push();
$s = $length > 0 ? fgets($this->handle, $length) : fgets($this->handle);
$this->pop();
return $s;
}
/**
* Peeks $length bytes from $offset from the file pointer position.
*
* @param int $offset
* The offset to move the file pointer before reading.
* @param int $length
* The max number of bytes to peek.
* @param boolean $eof
* Allow this function to seek passed the EOF.
*
* @return string
* The peeked bytes.
*/
function peek($offset, $length, $eof = TRUE) {
$this->push();
$this->seek($offset, SEEK_CUR, $eof);
$ret = $this->read($length);
$this->pop();
return $ret;
}
/**
* Pushes the current positon onto the stack.
*/
protected function push() {
$this->history[] = $this->pos;
}
/**
* Pops the last position of the stack.
*/
protected function pop() {
if (!empty($this->history)) {
$this->seek(array_pop($this->history));
}
}
}
/**
* Extends the File class to recognize line endings.
*/
class TextFile extends File {
/**
* Supported line ending formats.
*
* End of line (EOL) sequences
* Unix: "\n" 0x0a
* DOS: "\r\n" 0x0d0a
* MAC: "\r" 0x0d
*/
const UNIX = 1;
const DOS = 2;
const MAC = 3;
/**
* Endings
*
* @array
*/
static protected $formats = array(self::UNIX, self::DOS, self::MAC);
/**
* Endings
*
* @array
*/
static protected $endings = array(self::UNIX => "\n", self::DOS => "\r\n", self::MAC => "\r");
/**
* The expected line ending format.
*
* @var int
*/
protected $format;
/**
* The expected line ending value.
*
* @var string
*/
protected $ending;
/**
* Creates a TextFile instance.
*
* @param resource $handle
* A valid file handle as returned by fopen().
* @param int $format
* A valid line ending format for this file.
*
* @throws InvalidArgumentException
* If the $handle or $format provided is not vaild.
*/
public function __construct($handle, $format = NULL) {
parent::__construct($handle);
$format = isset($format) ? $format : $this->detectFormat();
if (array_search($format, self::$formats) === FALSE) {
throw new InvalidArgumentException(t('Invalid format given for ' . __CLASS__));
}
$this->format = $format;
$this->ending = self::$endings[$format];
}
/**
* Reads the file and attempts to guess the format.
*
* @return string
*/
public function detectFormat() {
/**
* @todo implment
*/
return self::UNIX;
}
/**
* Checks if the file pointer is on EOL character.
*
* End of line (EOL) sequences
* Windows end of line sequence: "\r\n"
* Unix end of line sequence: "\n"
* Mac end of line sequence: "\r"
*
* @param int $format
* Used to return the format discovered.
*
* @return boolean
* TRUE if the file pointer is on a EOL character FALSE otherwise.
*/
public function EOL() {
switch ($this->format) {
case self::UNIX:
return strcmp($this->peekc(), "\n") == 0;
case self::DOS:
return (strcmp($this->peek(0, 2), "\r\n") == 0 || strcmp($this->peek(-1, 2), "\r\n") == 0);
case self::MAC:
return strcmp($this->peekc(), "\r") == 0;
}
return FALSE;
}
/**
* If the file pointer is on an EOL character(s) move it to the last EOL character(s) in the EOL.
*
* Really only needed for multibyte line endings such as DOS.
*
* @return int
* The current position.
*/
public function seekLastEOL() {
if ($this->format == self::DOS && $this->EOL()) {
if (strcmp($this->peekc(), "\r") == 0) {
$this->seek(1, SEEK_CUR);
}
}
}
/**
* If the file pointer is on an EOL character(s) move it infront of the EOL character(s).
*
* @return int
* The current position.
*/
public function seekBeforeEOL() {
if ($this->EOL()) {
$c = $this->peekc();
$move = ($this->format & self::DOS) && strcasecmp($c, "\n") ? 2 : 1; // DOS is the only two character EOL.
$this->seek(-$move, SEEK_CUR);
}
return $this->pos;
}
/**
* If the file pointer is on an EOL character(s) move it past the EOL character(s).
*
* Only runs once in that if you have multiple lines with only EOL characters on them this
* will only move forward one line.
*
* @return int
* The current position.
*/
public function seekAfterEOL() {
if ($this->EOL()) {
$c = $this->peekc();
$move = ($this->format & self::DOS) && strcasecmp($c, "\r") ? 2 : 1; // DOS is the only two character EOL.
$this->seek($move, SEEK_CUR, FALSE); // Don't allow this function to go passed the EOF.
}
return $this->pos;
}
/**
* Moves the pointer the start of the line in which it currently is on.
*
* If we are to think of the file as a single stream of characters, going left to right.
* The start of the line is defined as the leftmost character including the current position
* that is not the previous lines EOL character if there is no previous line then its position 0.
*
* @return int
* The current position.
*/
public function seekLineStart() {
if ($this->EOF()) {
$this->seek(0, SEEK_END); // Make sure the pointer isn't passed the EOF.
}
$this->seekBeforeEOL(); // If we are on the EOL character for our line move infront of it.
/**
* Now on a non-EOL character of this line or in the case where this line
* is only an EOL character(s) the previous lines EOL character. Or in the case
* where this line is the first line and is only an EOL character we are at position 0
*/
do {
/**
* Note that this could be speed up by reading large chunks of the file and then
* processing them but this is easier/safer for the moment.
*/
if ($this->EOL()) { // We are on the previous line, move back to our line
$this->seekAfterEOL();
break;
}
} while ($this->seek(-1, SEEK_CUR)); // Keep looking for the previous line will stop at 0.
return $this->pos;
}
/**
* Moves the pointer the end of the line in which it currently is on.
*
* If we are to think of the file as a single stream of characters, going left to right.
* The end of the line is defined as the rightmost character including the current position
* that is the last EOL character in the set of EOL character that define a line ending. As defined below.
*
* Exceptional cases:
* If there is no EOL character on the current line only a EOF character this function will move to the EOF position.
* If the file pointer is pass the EOF character, this function will return the pointer to the EOF character.
*
* End of line (EOL) sequences
* Windows end of line sequence: "\r\n"
* Unix end of line sequence: "\n"
* Mac end of line sequence: "\r"
*
* @return int
* The current position.
*/
public function seekLineEnd() {
if ($this->EOF()) {
$this->seek(0, SEEK_END); // Make sure the pointer isn't passed the EOF.
}
do {
/**
* Note that this could be speed up by reading large chunks of the file and then
* processing them but this is easier/safer for the moment.
*/
if ($this->EOL()) {
$this->seekLastEOL();
break;
}
} while ($this->seek(1, SEEK_CUR, FALSE)); // Keep looking for the end of this line stop at EOF.
return $this->pos;
}
/**
* Seeks to the end of previous line.
*
* Exceptional cases:
* If the file pointer is on the first line it will be moved to positon 0.
* @return int
* The current position.
*/
public function seekPrevLineEnd() {
$this->seekLineStart();
return $this->seek(-1, SEEK_CUR); // Move to previous line, if position is 0 nothing happens.
}
/**
* Seeks to the beginning of previous line.
*
* @return int
* The current position.
*/
public function seekPrevLineStart() {
$this->seekPrevLineEnd();
return $this->seekLineStart();
}
/**
* Seeks to the beginning of previous line.
*
* @return int
* The current position.
*/
public function seekNextLineStart() {
$this->seekLineEnd();
if (!$this->EOF()) { // Don't move pass the EOF
$this->seek(1, SEEK_CUR);
}
return $this->pos;
}
/**
* Seeks to the end of previous line.
*
* Exceptional cases:
* If the file pointer is on the first line it will be moved to positon 0.
* @return int
* The current position.
*/
public function seekNextLineEnd() {
$this->seekNextLineStart();
return $this->seekLineEnd();
}
/**
* Sets the position of the file pointer at the start of the line defined by offset from $whence.
*
* Will not move the pointer past the start/end of the file.
*
* @param int $offset
* An offset in lines to move the pointer from the specified $whence value.
* Can be positve or negative.
* @param int $whence
* The context in which to evaluate the given $offset. The excepted values are:
* SEEK_SET - Set position equal to offset lines.
* SEEK_CUR - Set position to current location plus lines.
* SEEK_END - Set position to end-of-file plus lines.
*
* @return boolean
* TRUE if the seek succeeded, FALSE otherwise.
*/
public function seekLine($offset, $whence = SEEK_SET) {
$this->seek(0, $whence);
$this->seekLineStart();
$forward = $offset >= 0 ? TRUE : FALSE;
for ($i = 0; $i < $offset; $i++) {
$forward ? $this->seekNextLineStart() : $this->seekPrevLineStart();
}
return $success;
}
/**
* Similar to fgets but respects the files encoding.
*
* fgets is significatly faster but this is only noticible on large files with 10,000 or more lines.
*/
public function getLine() {
if (feof($this->handle)) {
return FALSE;
}
$start = ftell($this->handle);
$buffer = '';
$offset = 0;
while (!feof($this->handle)) {
$buffer .= fread($this->handle, 128);
if (($pos = strpos($buffer, $this->ending, $offset)) !== FALSE) {
fseek($this->handle, $start + $pos + strlen($this->ending), SEEK_SET);
return substr($buffer, 0, $pos);
}
/**
* If it didn't match maybe the first character was at the
* end of the line since encoding is at most 2 characters.
*/
$offset = strlen($buffer) - 2;
}
return strlen($buffer) == 0 ? FALSE : $buffer;
}
}
/**
* Extends File class for processing CSV. Allows multiple delimiters and multicharacter delimiters.
*
* Fields are defined as the string of characters between any delimited values and/or the start/end of the file.
*
* Unlike lines fields do not include there delimiter.
*/
class DelimitedFile extends TextFile {
/**
* The delimiters that seperate fields.
*
* @var array
*/
protected $delimiters;
/**
* A PREG pattern for matching delimiters.
*
* @var string
*/
protected $pattern;
/**
* Creates a DelimitedFile instance.
*
* @param resource $handle
* A valid file handle as returned by fopen().
* @param int $format
* A valid line ending format for this file.
* @param mixed $delimiters
* A single delimiter or a collection of delimiters that can be any number of characters.
*
* @throws InvalidArgumentException
* If the $handle or $format provided is not vaild.
*/
public function __construct($handle, $format = self::UNIX, $delimiters = ',') {
parent::__construct($handle, $format);
$this->delimiters = is_array($delimiters) ? $delimiters : array($delimiters);
if (array_search($this->ending, $this->delimiters) === FALSE) {
$this->delimiters[] = $this->ending;
}
$subpatterns = array();
foreach ($this->delimiters as $delmiter) {
$subpatterns[] = '(' . preg_quote($delmiter) . ')';
}
$this->pattern = '/' . implode('|', $subpatterns) . '/';
}
/**
* Checks if the file pointer is on a delimiter character(s).
*
* @return boolean
* TRUE if the file pointer is on a delimiter FALSE otherwise.
*/
public function isDelimiterSafe() {
$ret = FALSE;
$this->push();
if ($this->EOF()) {
return TRUE; // EOF is always a delimiter.
}
$c = $this->peekc();
foreach ($this->delimiters as $delimiter) {
/**
* Warning this class currently doesn't support delimited values
* that contain the same character more than once. Due to the use of strpos below.
*/
if (($offset = strpos($delimiter, $c)) !== FALSE) {
$this->push();
$this->seek(-$offset, SEEK_CUR); // Move to the expected start of the delimiter.
$read = $this->read(strlen($delimiter));
$this->pop();
if (strcmp($read, $delimiter) == 0) {
$ret = TRUE;
break;
}
}
}
$this->pop();
return $ret;
}
/**
* Moves the pointer after the delimiter if the pointer is currently on one.
*/
public function seekAfterDelimiter() {
$c = $this->peekc();
foreach ($this->delimiters as $delimiter) {
/**
* Warning this class currently doesn't support delimited values
* that contain the same character more than once. Due to the use of strpos below.
*/
if (($offset = strpos($delimiter, $c)) !== FALSE) {
$length = strlen($delimiter);
$this->push();
$this->seek(-$offset, SEEK_CUR); // Move to the expected start of the delimiter.
$read = $this->read($length);
$this->pop();
if (strcmp($read, $delimiter) == 0) {
$this->seek($length - $offset, SEEK_CUR);
break;
}
}
}
return $this->pos;
}
/**
* Moves the file pointer to the start of a field.
*
* Exceptional cases:
* If the file pointer is on the first field it will be moved to positon 0.
*
* @return int
* The current file pointer position.
*/
public function seekFieldStart() {
if ($this->isDelimiter()) {
return FALSE;
}
}
/**
* Gets the current delimited field from the point the file pointer is on.
*
* Fails if the file pointer is on a delimiter.
*
* @return string
* The current field if successful FALSE otherwise.
*/
public function getField() {
if ($this->EOF()) { // No fields remain.
return FALSE;
}
$ret = '';
while (!$this->isDelimiter()) { // Get non delimited characters
$ret .= $this->getc();
}
$this->seekAfterDelimiter(); // Move to start of next field
return $ret;
}
/**
* Gets a number of fields up to max $count if they exist.
*
* @param int $count
*
* @return array
* The requested fields up to a max of $count.
*/
public function getFieldsSafe($count) {
$fields = array();
while ($count != 0 && ($field = $this->getField()) !== FALSE) {
$fields[] = $field;
$count--;
}
return empty($fields) ? FALSE : $fields;
}
/**
* Unlike other functions in these classes this one has be optimized for speed.
*
* fgetcsv() is about twice as fast as this function but this supports multiple delimiters, allows for EOL characters
* to not be considered delimters useful when dealing with mixed line endings.
*
* The speed difference is only noticible on large files with 10,000 or more lines.
*
* @param int $count
* The number of fields to get.
* @return array
* The fields if found, FALSE otherwise.
*/
public function getFields() {
$line = $this->getLine();
return $line === FALSE ? FALSE : preg_split($this->pattern, $line);
}
/**
* Map Matches from getFields()
*
* @param array $item
* @param type $key
*/
private function mapMatches(array &$item, $key) {
$item = $item[0];
}
}
/**
* Moves the file pointer to the begining of the current line.
*
* If on the first line moves the pointer to 0.
*
* Any other line the pointer will be moved to the character following the previous line's EOL sequence as defined at
* in this files header.
*
* The EOL is considered part of that line.
*
* @param resource $handle
* File handle.
* @param int $read_size
* The amount to read in at time while searching for an EOL sequence. Tweak for performance.
*
* @return int
* The file position
*/
function file_line_start($handle, $read_size = 64) {
/**
* Ignore the cruft at the end of a file. This is important without it this function will assume the cruft
* at the end of the file was a valid line.
*/
if (file_eof($handle)) {
file_end($handle);
}
// Ignore the eol that we are currently sitting on as its part of the current line.
file_move_before_eol($handle);
if (ftell($handle) == 0) { // Already at the start of a line by definition.
return 0; // Ignore all other processing.
}
do { // Parse a section of the file looking for an EOL character from the previous line.
$pos = ftell($handle);
$length = min($pos, $read_size);
$last_section = ($pos - $length) == 0;
fseek($handle, -$length, SEEK_CUR);
/**
* Since we are looking for the character immediately following the EOL it not important if we get both
* characters from Windows EOL
*/
$s = fread($handle, $length); // Get section to parse.
// Search for the last carriage return in this section
$pattern = '/\r[^\r]*$/D'; // Last "\r"
$matches = array();
preg_match($pattern, $s, $matches, PREG_OFFSET_CAPTURE);
$carriage_return_pos = isset($matches[0][1]) ? $matches[0][1] : NULL;
// Search for the last new line in this section
$pattern = '/\n[^\n]*$/D'; // Last "\n"
$matches = array();
preg_match($pattern, $s, $matches, PREG_OFFSET_CAPTURE);
$new_line_pos = isset($matches[0][1]) ? $matches[0][1] : NULL;
// Check if we found an EOL character
$found_at = max($carriage_return_pos, $new_line_pos);
// Move to the next section.
fseek($handle, -$length, SEEK_CUR);
} while (!$found_at && !$last_section);
if ($found_at) { // Move pointer to character following the EOL
fseek($handle, $found_at, SEEK_CUR);
file_move_after_eol($handle);
}
return ftell($handle); // Current positon will be the character following the previous lines "\n" or the start of the file
}
/**
* Moves the file pointer to the end of the current line, or the end of the file if and EOL sequence is not found.
*
* Beware of cases where the EOF is preceeded by a EOL sequence, in these cases the file pointer will be moved to
* the EOL sequence.
*
* feof() will not work immeditely after this function is called. Use file_eof() instead.
*
* @param resource $handle
* File handle.
* @param int $read_size
* The amount to read in at time while searching for an EOL sequence. Tweak for performance.
*
* @return int
* The file position
*/
function file_line_end($handle, $read_size = 64) {
if (file_eof($handle)) { // If at the EOF Ignore the cruft and return the EOF positon.
file_end($handle);
return ftell($handle);
}
if (file_eol($handle)) { // If at the EOL return the positon of the last character in the EOL sequence.
return file_move_end_of_eol($handle);
}
do { // Parse a section of the file looking for an EOL character.
$remaining = file_tell_eof($handle) - ftell($handle);
$length = min($remaining, $read_size);
$last_section = ($remaining - $length) == 0;
$s = fread($handle, $length); // Get section to parse.
// Search for the first carriage return in this section
$pattern = '/^[^\r]*(\r)/D'; // First "\r"
$matches = array();
preg_match($pattern, $s, $matches, PREG_OFFSET_CAPTURE);
$carriage_return_pos = isset($matches[1][1]) ? $matches[1][1] : NULL;
// Search for the first new line in this section
$pattern = '/^[^\n]*(\n)/D'; // First "\n"
$matches = array();
preg_match($pattern, $s, $matches, PREG_OFFSET_CAPTURE);
$new_line_pos = isset($matches[1][1]) ? $matches[1][1] : NULL;
// Check if we found an EOL character
$found_at = min($carriage_return_pos, $new_line_pos);
} while (!$found_at && !$last_section);
if ($found_at) { // Move pointer to character following the EOL
$offset = -($length - $found_at); // Move ahead one onto the EOL
fseek($handle, $offset, SEEK_CUR);
file_move_end_of_eol($handle);
}
return ftell($handle);
}
/**
* Moves the file pointer to the begining of the previous line.
*
* This function can wrap the file, if called on the first line of the file the file pointer
* will be placed at the last line of the file.
*
* feof() will not work immeditely after this function is called. Use file_eof() instead.
*
* @param resource $handle
* File handle.
*
* @return int
* The file position
*/
function file_line_prev($handle) {
$pos = file_line_start($handle); // Either at beginning of file or the character following an EOL.
fungetc($handle); // Move back one character potentially onto the EOL, won't wrap the file.
file_move_before_eol($handle);
return file_line_start($handle);
}
/**
* Moves the file pointer to the begining of the previous line.
*
* This function can wrap the file, if called on the last line of the file the file pointer
* will be placed at the first line of the file.
*
* feof() will not work immeditely after this function is called. Use file_eof() instead.
*
* @param resource $handle
* File handle.
*
* @return int
* The file position
*/
function file_line_next($handle) {
$pos = file_line_end($handle);
file_move_after_eol($handle);
return ftell($handle);
}
/**
* Moves the file pointer back the given number of $lines.
*
* The file pointer will be at the begining of the destination line.
*
* This function won't wrap the file, if the begining of the file is reached the function exits.
*
* feof() will not work immeditely after this function is called. Use file_eof() instead.
*
* @param resource $handle
* File handle.
* @param int $lines
* The max number of lines to move backward in the file.
*
* @return int
* The file position
*/
function file_move_back($handle, $lines) {
$pos = file_line_start($handle);
if ($pos != 0) {
for ($i = 0; $i < $lines; $i++) {
if (($pos = file_line_prev($handle)) == 0) {
break;
}
}
}
return ftell($handle);
}
/**
* Moves the file pointer forward the given number of $lines.
*
* The file pointer will be at the begining of the destination line.
*
* This function won't wrap the file, if the end of the file is reached the function exits.
*
* feof() will not work immeditely after this function is called. Use file_eof() instead.
*
* @param resource $handle
* File handle.
* @param int $lines
* The max number of lines to move forward in the file.
*
* @return int
* The file position
*/
function file_move_forward($handle, $lines) {
$pos = file_line_start($handle);
for ($i = 0; $i < $lines; $i++) {
if (($pos = file_line_next($handle)) == 0) {
file_line_prev($handle);
break;
}
}
return ftell($handle);
}
/**
* Similar to fgetcsv, except it ignores enclosures, as fgetcvs breaks with open ended quotes.
*
* Also the number of expected fields can be given in which case the function will read multiple lines
* until it has gotten the all the required fields. This helps deal with files that don't have properly
* escaped newlines. The newlines will be preserved in the returned values.
*
* Please check too see if fgetcsv works before using this function as its scope is limited, it doesn't deal with
* enclosures or escapes correctly.
*
* Note that it is possible for this function to return more fields than expected.
*
* @param resource $handle
* File handle.
* @param int $length
* The max number of bytes to read from the current line.
* @param string $delimiter
* A single character used to delimit the fields in the csv.
* @param int $expected_fields
* The number of fields expected to be read for a single line.
*
* @return array
* An array containing the values in each field.
*/
function file_get_csv($handle, $length = 0, $delimiter = ',', $expected_fields = NULL) {
$string = (isset($length) && $length > 0) ? fgets($handle, $length) : fgets($handle); // fget will issue a warning if given 0 or NULL for length.
if ($string == FALSE) {
return FALSE;
}
if (isset($expected_fields)) {
$fields = explode($delimiter, $string);