|
8 | 8 | df.sort_values(by=['URL', 'category'], inplace=True)
|
9 | 9 | if os.path.exists('../data/none.csv'):
|
10 | 10 | df_none=pd.read_csv('../data/none.csv')
|
11 |
| - df_none=df_none.append((df[df['category']=='none'])[['URL', 'excerpt']]) |
| 11 | + df_none=df_none.append((df[df['category']=='none'])[['URL','contributor', 'excerpt']]) |
12 | 12 | df_none.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/none.csv', index=False)
|
13 | 13 | else:
|
14 |
| - (df[df['category']=='none']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False) |
| 14 | + (df[df['category']=='none']) [['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/none.csv', index=False) |
15 | 15 |
|
16 | 16 |
|
17 | 17 | if os.path.exists('../data/description.csv'):
|
18 | 18 | df_description=pd.read_csv('../data/description.csv')
|
19 |
| - df_description=df_description.append((df[df['category']=='description']) [['URL', 'excerpt']]) |
| 19 | + df_description=df_description.append((df[df['category']=='description']) [['URL','contributor', 'excerpt']]) |
20 | 20 | df_description.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/description.csv', index=False)
|
21 | 21 | else:
|
22 |
| - (df[df['category']=='description']) [['URL', 'excerpt']].to_csv(path_or_buf='../data/description.csv', index=False) |
| 22 | + (df[df['category']=='description']) [['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/description.csv', index=False) |
23 | 23 |
|
24 | 24 |
|
25 | 25 | if os.path.exists('../data/installation.csv'):
|
26 | 26 | df_installation=pd.read_csv('../data/installation.csv')
|
27 |
| - df_installation=df_installation.append((df[df['category']=='installation'])[['URL', 'excerpt']]) |
| 27 | + df_installation=df_installation.append((df[df['category']=='installation'])[['URL','contributor', 'excerpt']]) |
28 | 28 | df_installation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/installation.csv', index=False)
|
29 | 29 | else:
|
30 |
| - (df[df['category']=='installation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False) |
| 30 | + (df[df['category']=='installation'])[['URL','contributor', 'excerpt']].to_csv(path_or_buf='../data/installation.csv', index=False) |
31 | 31 |
|
32 | 32 |
|
33 | 33 | if os.path.exists('../data/invocation.csv'):
|
34 | 34 | df_invocation=pd.read_csv('../data/invocation.csv')
|
35 |
| - df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL', 'excerpt']]) |
| 35 | + df_invocation=df_invocation.append((df[df['category']=='invocation'])[['URL','contributor', 'excerpt']]) |
36 | 36 | df_invocation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/invocation.csv', index=False)
|
37 | 37 | else:
|
38 |
| - (df[df['category']=='invocation']).to_csv(path_or_buf='../data/invocation.csv', index=False) |
| 38 | + (df[df['category']=='invocation'])[['URL', 'contributor','excerpt']].to_csv(path_or_buf='../data/invocation.csv', index=False) |
39 | 39 |
|
40 | 40 | if os.path.exists('../data/citation.csv'):
|
41 | 41 | df_citation=pd.read_csv('~/Documents/ISI2019/SM2KG/data/citation.csv')
|
42 |
| - df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'excerpt']]) |
| 42 | + df_citation=df_citation.append((df[df['category']=='citation'])[['URL', 'contributor','excerpt']]) |
43 | 43 | df_citation.drop_duplicates(subset="excerpt").to_csv(path_or_buf='../data/citation.csv', index=False)
|
44 | 44 | else:
|
45 |
| - (df[df['category']=='citation'])[['URL', 'excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False) |
| 45 | + (df[df['category']=='citation'])[['URL','contributor','excerpt']].to_csv(path_or_buf='../data/citation.csv', index=False) |
0 commit comments