-
Notifications
You must be signed in to change notification settings - Fork 0
/
FindJPG_backup.txt
217 lines (192 loc) · 6.07 KB
/
FindJPG_backup.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
/*
author:zyr
function:find jpg file in a disk
notice:本文件是一个失败的尝试。我曾经试图通过复原文件头中的哈夫曼表来对每个块进行挑战,
如果一个扇区的前8位中存在能解释为当前哈夫曼编码集合中的数据则视其为有效的jpeg文件碎片,
但当时没有考虑到RST1-RST7的影响,故失败,输出函数也存在一定的错误,唯一确定正确的是复原哈夫曼表的函数
*/
#include <cstdint>
#include<set>
#include"FindJPG.h"
#include"BasicFunction.h"
#include"huffmanTree.h"
//从buffer中的offset开始读取2字节数据
uint16_t read2Bytes(unsigned char* buffer, unsigned offset)
{
uint16_t value = 0;
value |= buffer[offset] << 8;
value |= buffer[offset + 1];
return value;
}
bool JFIF_head_found(unsigned char* buffer)
{
if (read2Bytes(buffer, 0) == SOI && read2Bytes(buffer, 2) == APP0)
return true;
return false;
}
bool EXIF_head_found(unsigned char* buffer)
{
if (read2Bytes(buffer, 0) == SOI && read2Bytes(buffer, 2) == APP1)
return true;
return false;
}
static const char* huffman2binary_str(int huffman_code, int n_bits, char buf[64]) {
int mask = 1 << (n_bits - 1);
for (int i = 0; i < n_bits; i++) {
if (huffman_code & mask) {
buf[i] = '1';
}
else {
buf[i] = '0';
}
mask >>= 1;
}
buf[n_bits] = 0;
return buf;
}
//从sector_begin开始找第一个FFC4,返回一个包含所有哈夫曼编码的集合
set<string> get_huffmanCode_set(FILE* fp, unsigned int sector_begin, unsigned int* max_huffman_code_len)
{
static unsigned char* tmp = (unsigned char*)malloc(SECTOR_SIZE * sizeof(unsigned char));
set<string> huffman_code_set;
*max_huffman_code_len = 0;
for (unsigned int begin = sector_begin; begin < sector_begin + 5; begin++)//DHT可能在文件头之后1个扇区
{
ReadSector(fp, begin, tmp);
for (int offset = 0; offset < SECTOR_SIZE - 1; offset++)
{
if (read2Bytes(tmp, offset) == DHT)
{
const unsigned char* numbers = tmp + offset + 5;
const unsigned char* symbols = numbers + 16;
char buf[64];
int huffman_code = 0;
for (int i = 0; i < 16; i++) {
int num = numbers[i];
int n_bits = i + 1;
for (int j = 0; j < num; j++) {
int symbol = *symbols;
if (n_bits > *max_huffman_code_len)
*max_huffman_code_len = n_bits;
printf("0x%0.2x | %s\n", symbol, huffman2binary_str(huffman_code, n_bits, buf));
huffman_code_set.insert(huffman2binary_str(huffman_code, n_bits, buf));
huffman_code++;
symbols++;
}
huffman_code <<= 1;
}
return huffman_code_set;
}
}
}
return huffman_code_set;
}
//set<string> create_huffman_lookup_table(set<string> huffman_code_set, unsigned int max_huffman_code_len) {
// set<string> huffman_look_up_table;
// for (auto code : huffman_code_set) {
// while (code.length() < max_huffman_code_len) {
// code = "0" + code;
// }
// huffman_look_up_table.insert(code);
// }
// return huffman_look_up_table;
//}
uint16_t string2bin(string* s)
{
uint16_t n = 0;
for (int i = 0; i < (*s).size(); i++)
{
n <<= 1;
if ((*s)[i] == '1')
n |= 1;
}
return n;
}
set<uint16_t> create_huffman_lookup_table(set<string> huffman_code_set)
{
set<uint16_t> huffman_look_up_table;
for (string code : huffman_code_set) {
huffman_look_up_table.insert(string2bin(&code));
}
return huffman_look_up_table;
}
bool is_in_set(const std::set<uint16_t>& set, uint16_t num)
{
return set.find(num) != set.end();
}
//
//bool is_jpg_sector(unsigned char* buffer, unsigned int offset, set<uint16_t> huffman_look_up_table)
//{
// int warning_level = 1;//判断是不是属于jpg的块,大于10则退出
// for (unsigned int offset_test = 0; offset_test < offset; ++offset_test)
// {
// if (is_in_set(huffman_look_up_table, read2Bytes(buffer, offset_test)) && warning_level != 1)
// warning_level--;
// else
// warning_level *= 2;
// if (warning_level > 10)
// return false;
// }
// return true;
//}
//从sector_begin开始一直往sector_end找,直到找到第一个0xFFD9,返回其所在的扇区号
int get_JPG_end(FILE* fp, set<string> huffman_code_set, unsigned int sector_begin, unsigned int sector_end)
{
static unsigned char* temp = (unsigned char*)malloc(SECTOR_SIZE * sizeof(unsigned char));
for (unsigned int i = sector_begin; i < sector_end; i++)
{
ReadSector(fp, i, temp);
for (unsigned int offset = 0; offset < SECTOR_SIZE - 1; ++offset) {
if (read2Bytes(temp, offset) == EOI)
{
//if (is_jpg_sector(temp, offset, huffman_look_up_table))
if (is_jpg_sector(temp, huffman_code_set) == 1)
{
printf_s("end at sector: %d\n", i);
return i;
}
}
}
}
return -1;
}
int jump_to_sos(FILE* fp, unsigned int curSector, unsigned int SectorNum)
{
int sos_begin_sector = 0;
return sos_begin_sector;
}
void rebuild_JPG(FILE* fp, unsigned char* buffer, const char* output_path)
{
unsigned int curSector = 0;//当前读取扇区号
int cnt = 0;//找到的文件个数
char fileName[200];
set<string> huffman_code_set;//存储找到的哈夫曼编码
set<uint16_t> huffman_look_up_table;//哈夫曼编码查找表
fseek(fp, 0, SEEK_END);
unsigned int SectorNum = ftell(fp) / SECTOR_SIZE;//总扇区数
fseek(fp, 0, SEEK_SET);
for (curSector = 0; curSector < SectorNum; curSector++) {
ReadSector(fp, curSector, buffer);
if (JFIF_head_found(buffer)) {
printf_s("JFIF begin at sector: %d\n", curSector);
unsigned int max_huffman_code_len = 0;
huffman_code_set = get_huffmanCode_set(fp, curSector, &max_huffman_code_len);
//从SOI之后开始,找到压缩数据的开头,避免FFD9的误伤
int sos_begin = jump_to_sos(fp, curSector, SectorNum);
/*for (auto it = huffman_look_up_table.begin(); it != huffman_look_up_table.end(); it++)
{
cout << std::hex << *it << endl;
}
printf("哈夫曼编码的最大长度为:%d\n", max_huffman_code_len);*/
int sector_end = get_JPG_end(fp, huffman_code_set, curSector, SectorNum);
if (sector_end == -1)
printf_s("未找到文件尾\n");
}
else if (EXIF_head_found(buffer)) {
printf_s("EXIF begin at sector:%d\n", curSector);
}
else {
}
huffman_code_set.clear();
}
}