Rupert Young
2017-06-28 20:40:06 UTC
Hello,
How can I retrieve the contents of a http page that first requires a
form post?
I want to retrieve the data from a web mailing list archive to convert
emails to a forum. When accessing through the browser I first submit a
"not spammer" form button and then the xhtml data (with email) appears.
I am trying to recreate this through Java but get different data for the
response, or second request. What am I doing wrong? Should the initial
post response retrieve the data, or should I have to do the second call?
Here's the code I am trying:
public class App {
public App() {
}
public void sendReq(String url, String list, String not_a_sniffer)
throws IOException {
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpPost httpost = new HttpPost(url);
List<NameValuePair> nvps = new ArrayList<>();
nvps.add(new BasicNameValuePair("list", list));
nvps.add(new BasicNameValuePair("not_a_sniffer", not_a_sniffer));
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
HttpResponse response = httpclient.execute(httpost);
HttpEntity entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
if (entity != null) {
EntityUtils.consume(entity);
}
HttpGet httpget = new HttpGet( url);
HttpResponse getresponse = httpclient.execute(httpget);
HttpEntity getentity = getresponse.getEntity();
saveInputStream(getentity.getContent(), "273b.xml");
if (getentity != null) {
EntityUtils.consume(getentity);
}
}
private void saveInputStream(InputStream is, String sfile) throws
IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(is));
StringBuilder sb = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null) {
//System.out.println(inputLine);
sb.append(inputLine);
}
in.close();
File file = new File(sfile);
try (FileOutputStream fout = new FileOutputStream(file)) {
fout.write(sb.toString().getBytes());
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
String html =
"https://lists.illinois.edu/lists/arc/csgnet/2017-06/msg00273.html";
App app = new App();
app.sendReq(html, "csgnet", "1");
System.out.println("done");
} catch (MalformedURLException ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
} catch (Exception ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
Any help appreciated.
How can I retrieve the contents of a http page that first requires a
form post?
I want to retrieve the data from a web mailing list archive to convert
emails to a forum. When accessing through the browser I first submit a
"not spammer" form button and then the xhtml data (with email) appears.
I am trying to recreate this through Java but get different data for the
response, or second request. What am I doing wrong? Should the initial
post response retrieve the data, or should I have to do the second call?
Here's the code I am trying:
public class App {
public App() {
}
public void sendReq(String url, String list, String not_a_sniffer)
throws IOException {
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpPost httpost = new HttpPost(url);
List<NameValuePair> nvps = new ArrayList<>();
nvps.add(new BasicNameValuePair("list", list));
nvps.add(new BasicNameValuePair("not_a_sniffer", not_a_sniffer));
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
HttpResponse response = httpclient.execute(httpost);
HttpEntity entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
if (entity != null) {
EntityUtils.consume(entity);
}
HttpGet httpget = new HttpGet( url);
HttpResponse getresponse = httpclient.execute(httpget);
HttpEntity getentity = getresponse.getEntity();
saveInputStream(getentity.getContent(), "273b.xml");
if (getentity != null) {
EntityUtils.consume(getentity);
}
}
private void saveInputStream(InputStream is, String sfile) throws
IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(is));
StringBuilder sb = new StringBuilder();
String inputLine;
while ((inputLine = in.readLine()) != null) {
//System.out.println(inputLine);
sb.append(inputLine);
}
in.close();
File file = new File(sfile);
try (FileOutputStream fout = new FileOutputStream(file)) {
fout.write(sb.toString().getBytes());
}
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
try {
String html =
"https://lists.illinois.edu/lists/arc/csgnet/2017-06/msg00273.html";
App app = new App();
app.sendReq(html, "csgnet", "1");
System.out.println("done");
} catch (MalformedURLException ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
} catch (Exception ex) {
Logger.getLogger(App.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
Any help appreciated.
--
*Regards,
Rupert*
*
*
*Regards,
Rupert*
*
*