Fetching sul web ProvinciaCrawler completato

This commit is contained in:
Fabio Scotto di Santolo
2017-01-04 10:27:23 +01:00
parent d2cfcf4191
commit 28a6fb5c36
16 changed files with 297 additions and 106 deletions

View File

@@ -1,8 +1,106 @@
eclipse.preferences.version=1 eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled
org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore
org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull
org.eclipse.jdt.core.compiler.annotation.nonnull.secondary=
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault.secondary=
org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable
org.eclipse.jdt.core.compiler.annotation.nullable.secondary=
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.8 org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
org.eclipse.jdt.core.compiler.problem.deadCode=warning
org.eclipse.jdt.core.compiler.problem.deprecation=warning
org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=ignore
org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled
org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=warning
org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=warning
org.eclipse.jdt.core.compiler.problem.nonnullTypeVariableFromLegacyInvocation=warning
org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error
org.eclipse.jdt.core.compiler.problem.nullReference=warning
org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error
org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=warning
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
org.eclipse.jdt.core.compiler.problem.pessimisticNullAnalysisForFreeTypeVariables=warning
org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore
org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=warning
org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=ignore
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore
org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled
org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled
org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
org.eclipse.jdt.core.compiler.problem.unclosedCloseable=warning
org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
org.eclipse.jdt.core.compiler.problem.unusedExceptionParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedImport=warning
org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
org.eclipse.jdt.core.compiler.source=1.8 org.eclipse.jdt.core.compiler.source=1.8

View File

@@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.m2e.wtp.enabledProjectSpecificPrefs=false

View File

@@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.wst.ws.service.policy.projectEnabled=false

View File

@@ -30,5 +30,15 @@
<artifactId>jsoup</artifactId> <artifactId>jsoup</artifactId>
<version>1.10.1</version> <version>1.10.1</version>
</dependency> </dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.22</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.22</version>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@@ -1,14 +0,0 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Column {
public String name();
}

View File

@@ -1,12 +0,0 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface NoahCrawlerModel {
}

View File

@@ -1,12 +0,0 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Transiet {
}

View File

@@ -3,9 +3,10 @@ package it.noah.crawler.converter.impl;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import it.noah.crawler.annotation.Column;
import it.noah.crawler.converter.Converter; import it.noah.crawler.converter.Converter;
import it.noah.crawler.dom.tag.Cell;
import it.noah.crawler.dom.tag.Table; import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.dom.tag.TableRow;
import it.noah.crawler.model.Provincia; import it.noah.crawler.model.Provincia;
public class ProvinciaConverter implements Converter<Provincia> { public class ProvinciaConverter implements Converter<Provincia> {
@@ -13,14 +14,30 @@ public class ProvinciaConverter implements Converter<Provincia> {
@Override @Override
public List<Provincia> convertTable(Table table) { public List<Provincia> convertTable(Table table) {
List<Provincia> province = new ArrayList<Provincia>(); List<Provincia> province = new ArrayList<Provincia>();
int j = 0; for (TableRow row : table.getRows()) {
for (int i = 0; i < table.columns(); i++) { Provincia provincia = getProvincia(row);
Provincia provincia = new Provincia();
Column annotations = provincia.getClass()
.getAnnotation(Column.class);
province.add(provincia); province.add(provincia);
} }
return province; return province;
} }
private Provincia getProvincia(TableRow row) {
Provincia provincia = new Provincia();
for (int i = 0; i < row.getRow().size(); i++) {
Cell cell = row.getRow().get(i);
switch (i) {
case 0:
provincia.setSigla(cell.getValue());
break;
case 1:
provincia.setNome(cell.getValue());
break;
case 2:
provincia.setRegione(cell.getValue());
break;
}
}
return provincia;
}
} }

View File

@@ -6,10 +6,6 @@ public class Table {
private TableHeader header; private TableHeader header;
private List<TableRow> rows; private List<TableRow> rows;
public Table() {
header = new TableHeader();
}
public void setHeader(TableHeader header) { public void setHeader(TableHeader header) {
this.header = header; this.header = header;
} }
@@ -27,7 +23,7 @@ public class Table {
} }
public int columns() { public int columns() {
return header.size(); return header != null ? header.size() : 0;
} }
} }

View File

@@ -1,25 +1,30 @@
package it.noah.crawler.dom.tag.builder; package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.Table; import java.util.List;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.dom.tag.TableHeader;
import it.noah.crawler.dom.tag.TableRow;
public class TableBuilder { public class TableBuilder {
public Table build(Element element, boolean headerFlag) { public Table build(Element element, boolean headerFlag) {
Table table = new Table(); Table table = new Table();
Elements elements = element.select("tr"); Elements rows = element.select("tr");
Element header; Element header;
if (headerFlag) { if (headerFlag) {
header = elements.select("tr").first(); header = rows.first();
header.remove(); rows.remove(header);
TableHeaderBuilder thb = new TableHeaderBuilder(); TableHeaderBuilder tableHeaderBuilder = new TableHeaderBuilder();
table.setHeader(thb.build(header)); TableHeader tableHeader = tableHeaderBuilder.build(header);
table.setHeader(tableHeader);
} }
Elements rows = elements.select("tr"); TableRowBuilder tableRowBuilder = new TableRowBuilder();
TableRowBuilder thr = new TableRowBuilder(); List<TableRow> tableRows = tableRowBuilder.build(rows);
// table.setBody(thr.build(rows)); TODO table.setRows(tableRows);
return table; return table;
} }

View File

@@ -1,12 +1,21 @@
package it.noah.crawler.dom.tag.builder; package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.TableHeader;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import it.noah.crawler.dom.tag.Cell;
import it.noah.crawler.dom.tag.TableHeader;
public class TableHeaderBuilder { public class TableHeaderBuilder {
public TableHeader build(Element element) { public TableHeader build(Element element) {
return null; TableHeader tableHeader = new TableHeader();
Elements tableDatas = element.getElementsByTag("th");
for (Element tableData : tableDatas) {
Cell data = new Cell();
data.setValue(tableData.text());
tableHeader.addColumn(data);
}
return tableHeader;
} }
} }

View File

@@ -1,22 +1,31 @@
package it.noah.crawler.dom.tag.builder; package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.Cell; import java.util.ArrayList;
import it.noah.crawler.dom.tag.TableRow; import java.util.List;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import it.noah.crawler.dom.tag.Cell;
import it.noah.crawler.dom.tag.TableRow;
public class TableRowBuilder { public class TableRowBuilder {
public TableRow build(Elements rows) { public List<TableRow> build(Elements htmlRows) {
List<TableRow> rows = new ArrayList<TableRow>();
for (Element htmlRow : htmlRows) {
TableRow tableRow = new TableRow(); TableRow tableRow = new TableRow();
for (Element row : rows) { List<Cell> row = new ArrayList<Cell>();
Elements tds = row.select("td"); Elements tds = htmlRow.select("td");
for (Element tableData : tds) {
Cell cell = new Cell(); Cell cell = new Cell();
cell.setValue(tds.text()); cell.setValue(tableData.text());
// tableRow.addColumn(cell); TODO row.add(cell);
} }
return tableRow; tableRow.addRow(row);
rows.add(tableRow);
}
return rows;
} }
} }

View File

@@ -1,5 +1,11 @@
package it.noah.crawler.impl; package it.noah.crawler.impl;
import java.io.IOException;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import it.noah.crawler.AbstractNoahCrawler; import it.noah.crawler.AbstractNoahCrawler;
import it.noah.crawler.NoahCrawler; import it.noah.crawler.NoahCrawler;
import it.noah.crawler.converter.impl.ProvinciaConverter; import it.noah.crawler.converter.impl.ProvinciaConverter;
@@ -9,11 +15,11 @@ import it.noah.crawler.exception.NoahCrawlerException;
import it.noah.crawler.exception.ObjectNotFoundException; import it.noah.crawler.exception.ObjectNotFoundException;
import it.noah.crawler.model.Provincia; import it.noah.crawler.model.Provincia;
import java.io.IOException; public class ProvinciaNoahCrawler extends AbstractNoahCrawler
import java.util.List; implements NoahCrawler {
public class ProvinciaNoahCrawler extends AbstractNoahCrawler implements private final static Logger log = LoggerFactory
NoahCrawler { .getLogger(ProvinciaNoahCrawler.class);
public ProvinciaNoahCrawler() throws IOException { public ProvinciaNoahCrawler() throws IOException {
super(UrlEnum.PROVINCE_URL.getUrl()); super(UrlEnum.PROVINCE_URL.getUrl());
@@ -22,10 +28,12 @@ public class ProvinciaNoahCrawler extends AbstractNoahCrawler implements
@Override @Override
public void run() throws NoahCrawlerException { public void run() throws NoahCrawlerException {
try { try {
List<Provincia> province = convertTable2Provincia(getTable("", true)); List<Provincia> province = convertTable2Provincia(
getTable("", true));
log.debug("Provincia convertita con successo!!!");
// TODO una volta creata la lista va salvata sul db // TODO una volta creata la lista va salvata sul db
} catch (ObjectNotFoundException e) { } catch (ObjectNotFoundException e) {
e.printStackTrace(); log.error(e.getMessage());
} }
} }

View File

@@ -1,25 +1,15 @@
package it.noah.crawler.model; package it.noah.crawler.model;
import it.noah.crawler.annotation.Column; import java.io.Serializable;
import it.noah.crawler.annotation.NoahCrawlerModel;
import it.noah.crawler.annotation.Transiet;
@NoahCrawlerModel public class Comune implements Serializable {
public class Comune {
private static final long serialVersionUID = -2171167117875954706L;
@Transiet
private Long id; private Long id;
@Column(name = "")
private String nome; private String nome;
@Column(name = "")
private String cap; private String cap;
@Column(name = "")
private Provincia provincia; private Provincia provincia;
@Column(name = "")
private String regione; private String regione;
public void setId(Long id) { public void setId(Long id) {

View File

@@ -1,27 +1,16 @@
package it.noah.crawler.model; package it.noah.crawler.model;
import it.noah.crawler.annotation.Column; import java.io.Serializable;
import it.noah.crawler.annotation.NoahCrawlerModel;
import it.noah.crawler.annotation.Transiet;
import java.util.List; import java.util.List;
@NoahCrawlerModel public class Provincia implements Serializable {
public class Provincia {
private static final long serialVersionUID = 106451135158559443L;
@Transiet
private Long id; private Long id;
@Column(name = "Provincia")
private String nome; private String nome;
@Column(name = "Sigla")
private String sigla; private String sigla;
@Column(name = "Regione")
private String regione; private String regione;
@Transiet
private List<Comune> comuni; private List<Comune> comuni;
public void setId(Long id) { public void setId(Long id) {

View File

@@ -0,0 +1,94 @@
package it.noah.crawler.servlet;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import it.noah.crawler.enums.CrawlerEnum;
import it.noah.crawler.exception.NoahCrawlerException;
import it.noah.crawler.factory.NoahCrawlerFactory;
/**
* Servlet implementation class NoahCrawlerServlet
*/
@WebServlet("/noahCrawling")
public class NoahCrawlerServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
private final static Logger log = LoggerFactory
.getLogger(NoahCrawlerServlet.class);
/**
* @see HttpServlet#HttpServlet()
*/
public NoahCrawlerServlet() {
super();
}
/**
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse
* response)
*/
@Override
protected void doGet(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
handleRequest(request, response);
}
/**
* @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse
* response)
*/
@Override
protected void doPost(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
handleRequest(request, response);
}
private void handleRequest(HttpServletRequest request,
HttpServletResponse response) throws IOException {
String param = request.getParameter("job");
PrintWriter out = response.getWriter();
try {
if (param == null) {
log.warn("ATTENZIONE!!! Stanno partendo tutti i crawler");
runAllCrawler();
}
if (CrawlerEnum.PROVINCIA_CRAWLER.name().equalsIgnoreCase(param)) {
runCrawler(CrawlerEnum.PROVINCIA_CRAWLER);
} else if (CrawlerEnum.COMUNE_CRAWLER.name()
.equalsIgnoreCase(param)) {
runCrawler(CrawlerEnum.COMUNE_CRAWLER);
} else if ("allCrawler".equalsIgnoreCase(param)) {
runAllCrawler();
}
} catch (NoahCrawlerException | IOException e) {
log.error("[ERRORE] " + e.getMessage());
out.println("[ERRORE] " + e.getMessage());
}
out.println("<h1>Job eseguito con successo!!!</h1>");
}
private void runAllCrawler() throws NoahCrawlerException, IOException {
for (CrawlerEnum crawlerEnum : CrawlerEnum.values()) {
runCrawler(crawlerEnum);
}
}
private void runCrawler(CrawlerEnum crawlerEnum)
throws NoahCrawlerException, IOException {
NoahCrawlerFactory.getInstance(crawlerEnum).run();
}
}