Skip to content

Commit 78df7cd

Browse files
author
ironholds
committed
Fix #20
1 parent 5438111 commit 78df7cd

File tree

10 files changed

+192
-6
lines changed

10 files changed

+192
-6
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export(port)
1313
export(query)
1414
export(scheme)
1515
export(suffix_extract)
16+
export(url_compose)
1617
export(url_decode)
1718
export(url_encode)
1819
export(url_parameters)

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ NEW FEATURES
55
* Jay Jacobs' "tldextract" functionality has been merged with urltools, and can be accessed
66
with "suffix_extract"
77

8+
* At Nicolas Coutin's suggestion, url_compose - url_parse in reverse - has been introduced.
9+
810
BUG FIXES
911

1012
* A bug in which fragments could not be retrieved (and were incorrectly identified as parameters)

R/RcppExports.R

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,35 @@ url_parameters <- function(urls, parameter_names) {
113113
#'@examples
114114
#'url_parse("https://en.wikipedia.org/wiki/Article")
115115
#'
116-
#'@seealso \code{url_parameters} for extracting values associated with particular keys in a URL's
117-
#'query string.
116+
#'@seealso \code{\link{url_parameters}} for extracting values associated with particular keys in a URL's
117+
#'query string, and \code{\link{url_compose}}, which is \code{url_parse} in reverse.
118118
#'
119119
#'@export
120120
url_parse <- function(urls) {
121121
.Call('urltools_url_parse', PACKAGE = 'urltools', urls)
122122
}
123123

124+
#'@title Recompose Parsed URLs
125+
#'
126+
#'@description Sometimes you want to take a vector of URLs, parse them, perform
127+
#'some operations and then rebuild them. \code{url_compose} takes a data.frame produced
128+
#'by \code{\link{url_parse}} and rebuilds it into a vector of full URLs (or: URLs as full
129+
#'as the vector initially thrown into url_parse).
130+
#'
131+
#'This is currently a `beta` feature; please do report bugs if you find them.
132+
#'
133+
#'@param parsed_urls a data.frame sourced from \code{\link{url_parse}}
134+
#'
135+
#'@seealso \code{\link{scheme}} and other accessors, which you may want to
136+
#'run URLs through before composing them to modify individual values.
137+
#'
138+
#'@examples
139+
#'#Parse a URL and compose it
140+
#'url <- "http://en.wikipedia.org"
141+
#'url_compose(url_parse(url))
142+
#'
143+
#'@export
144+
url_compose <- function(parsed_urls) {
145+
.Call('urltools_url_compose', PACKAGE = 'urltools', parsed_urls)
146+
}
147+

man/url_compose.Rd

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
% Generated by roxygen2 (4.1.1): do not edit by hand
2+
% Please edit documentation in R/RcppExports.R
3+
\name{url_compose}
4+
\alias{url_compose}
5+
\title{Recompose Parsed URLs}
6+
\usage{
7+
url_compose(parsed_urls)
8+
}
9+
\arguments{
10+
\item{parsed_urls}{a data.frame sourced from \code{\link{url_parse}}}
11+
}
12+
\description{
13+
Sometimes you want to take a vector of URLs, parse them, perform
14+
some operations and then rebuild them. \code{url_compose} takes a data.frame produced
15+
by \code{\link{url_parse}} and rebuilds it into a vector of full URLs (or: URLs as full
16+
as the vector initially thrown into url_parse).
17+
18+
This is currently a `beta` feature; please do report bugs if you find them.
19+
}
20+
\examples{
21+
#Parse a URL and compose it
22+
url <- "http://en.wikipedia.org"
23+
url_compose(url_parse(url))
24+
}
25+
\seealso{
26+
\code{\link{scheme}} and other accessors, which you may want to
27+
run URLs through before composing them to modify individual values.
28+
}
29+

man/url_parse.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ HTTP requests from R), but not for large-scale analysis.
3030
url_parse("https://en.wikipedia.org/wiki/Article")
3131
}
3232
\seealso{
33-
\code{url_parameters} for extracting values associated with particular keys in a URL's
34-
query string.
33+
\code{\link{url_parameters}} for extracting values associated with particular keys in a URL's
34+
query string, and \code{\link{url_compose}}, which is \code{url_parse} in reverse.
3535
}
3636

src/RcppExports.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,14 @@ BEGIN_RCPP
7575
return __result;
7676
END_RCPP
7777
}
78+
// url_compose
79+
std::vector < std::string > url_compose(DataFrame parsed_urls);
80+
RcppExport SEXP urltools_url_compose(SEXP parsed_urlsSEXP) {
81+
BEGIN_RCPP
82+
Rcpp::RObject __result;
83+
Rcpp::RNGScope __rngScope;
84+
Rcpp::traits::input_parameter< DataFrame >::type parsed_urls(parsed_urlsSEXP);
85+
__result = Rcpp::wrap(url_compose(parsed_urls));
86+
return __result;
87+
END_RCPP
88+
}

src/compose.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#include "compose.h"
2+
3+
bool compose::emptycheck(std::string element){
4+
if(element.size() == 0){
5+
return false;
6+
}
7+
return true;
8+
}
9+
10+
std::string compose::compose_single(std::string scheme, std::string domain, std::string port, std::string path,
11+
std::string parameter, std::string fragment){
12+
13+
std::string output;
14+
15+
if(emptycheck(scheme)){
16+
output += scheme + "://";
17+
}
18+
19+
if(emptycheck(domain)){
20+
output += domain;
21+
22+
if(emptycheck(port)){
23+
output += ":" + port;
24+
} else {
25+
output += "/";
26+
}
27+
}
28+
29+
if(emptycheck(path)){
30+
output += path;
31+
}
32+
33+
if(emptycheck(parameter)){
34+
output += "?";
35+
output += parameter;
36+
}
37+
38+
if(emptycheck(fragment)){
39+
output += "#";
40+
output += fragment;
41+
}
42+
43+
return output;
44+
}
45+
46+
std::vector < std::string > compose::compose_multiple(DataFrame parsed_urls){
47+
48+
std::vector < std::string > schemes = parsed_urls["scheme"];
49+
std::vector < std::string > domains = parsed_urls["domain"];
50+
std::vector < std::string > ports = parsed_urls["port"];
51+
std::vector < std::string > paths = parsed_urls["path"];
52+
std::vector < std::string > parameters = parsed_urls["parameter"];
53+
std::vector < std::string > fragments = parsed_urls["fragment"];
54+
55+
std::vector < std::string > output(schemes.size());
56+
57+
for(unsigned int i = 0; i < schemes.size(); i++){
58+
if((i % 10000) == 0){
59+
Rcpp::checkUserInterrupt();
60+
}
61+
output[i] = compose_single(schemes[i], domains[i], ports[i], paths[i], parameters[i],
62+
fragments[i]);
63+
}
64+
65+
return output;
66+
}

src/compose.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include <Rcpp.h>
2+
using namespace Rcpp;
3+
4+
#ifndef __COMPOSE_INCLUDED__
5+
#define __COMPOSE_INCLUDED__
6+
7+
class compose {
8+
9+
private:
10+
11+
bool emptycheck(std::string element);
12+
13+
std::string compose_single(std::string scheme, std::string domain, std::string port, std::string path,
14+
std::string parameter, std::string fragment);
15+
16+
public:
17+
18+
std::vector < std::string > compose_multiple(DataFrame parsed_urls);
19+
};
20+
21+
#endif

src/urltools.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <Rcpp.h>
22
#include "encoding.h"
33
#include "parsing.h"
4+
#include "compose.h"
45

56
using namespace Rcpp;
67

@@ -173,13 +174,39 @@ List url_parameters(std::vector < std::string > urls, std::vector < std::string
173174
//'@examples
174175
//'url_parse("https://en.wikipedia.org/wiki/Article")
175176
//'
176-
//'@seealso \code{url_parameters} for extracting values associated with particular keys in a URL's
177-
//'query string.
177+
//'@seealso \code{\link{url_parameters}} for extracting values associated with particular keys in a URL's
178+
//'query string, and \code{\link{url_compose}}, which is \code{url_parse} in reverse.
178179
//'
179180
//'@export
180181
//[[Rcpp::export]]
181182
DataFrame url_parse(std::vector < std::string > urls){
182183
std::vector < std::string >& urls_ptr = urls;
183184
parsing p_inst;
184185
return p_inst.parse_to_df(urls_ptr);
186+
}
187+
188+
//'@title Recompose Parsed URLs
189+
//'
190+
//'@description Sometimes you want to take a vector of URLs, parse them, perform
191+
//'some operations and then rebuild them. \code{url_compose} takes a data.frame produced
192+
//'by \code{\link{url_parse}} and rebuilds it into a vector of full URLs (or: URLs as full
193+
//'as the vector initially thrown into url_parse).
194+
//'
195+
//'This is currently a `beta` feature; please do report bugs if you find them.
196+
//'
197+
//'@param parsed_urls a data.frame sourced from \code{\link{url_parse}}
198+
//'
199+
//'@seealso \code{\link{scheme}} and other accessors, which you may want to
200+
//'run URLs through before composing them to modify individual values.
201+
//'
202+
//'@examples
203+
//'#Parse a URL and compose it
204+
//'url <- "http://en.wikipedia.org"
205+
//'url_compose(url_parse(url))
206+
//'
207+
//'@export
208+
//[[Rcpp::export]]
209+
std::vector < std::string > url_compose(DataFrame parsed_urls){
210+
compose c_inst;
211+
return c_inst.compose_multiple(parsed_urls);
185212
}

tests/testthat/test_parsing.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,8 @@ test_that("Parameter parsing works where the parameter appears earlier in the UR
4646
expect_that(url$to[1], equals("05/04/2015"))
4747
})
4848

49+
test_that("Composing works",{
50+
url <- "http://foo.bar.baz/qux/"
51+
amended_url <- url_compose(url_parse(url))
52+
expect_that(url, equals(amended_url))
53+
})

0 commit comments

Comments
 (0)