aggiunta classi vecchio progetto

This commit is contained in:
Fabio Scotto di Santolo
2017-01-03 13:05:49 +01:00
parent c70523e9e7
commit d2cfcf4191
26 changed files with 626 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
package it.noah.crawler;
import it.noah.crawler.dom.NoahDOMExplorer;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.exception.NoahCrawlerException;
import it.noah.crawler.exception.ObjectNotFoundException;
import java.io.IOException;
public abstract class AbstractNoahCrawler implements NoahCrawler {
private NoahDOMExplorer explorer;
public AbstractNoahCrawler(String url) {
try {
this.explorer = new NoahDOMExplorer(url);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public abstract void run() throws NoahCrawlerException;
public Table getTable(String tableName, boolean header)
throws ObjectNotFoundException {
return explorer.getTable(tableName, header);
}
}

View File

@@ -0,0 +1,9 @@
package it.noah.crawler;
import it.noah.crawler.exception.NoahCrawlerException;
public interface NoahCrawler {
public void run() throws NoahCrawlerException;
}

View File

@@ -0,0 +1,14 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Column {
public String name();
}

View File

@@ -0,0 +1,12 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface NoahCrawlerModel {
}

View File

@@ -0,0 +1,12 @@
package it.noah.crawler.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Transiet {
}

View File

@@ -0,0 +1,11 @@
package it.noah.crawler.converter;
import it.noah.crawler.dom.tag.Table;
import java.util.List;
public interface Converter<T> {
public <T> List<T> convertTable(Table table);
}

View File

@@ -0,0 +1,18 @@
package it.noah.crawler.converter.impl;
import it.noah.crawler.converter.Converter;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.model.Comune;
import java.util.ArrayList;
import java.util.List;
public class ComuneConverter implements Converter<Comune> {
@Override
public List<Comune> convertTable(Table table) {
List<Comune> comuni = new ArrayList<Comune>();
return comuni;
}
}

View File

@@ -0,0 +1,26 @@
package it.noah.crawler.converter.impl;
import java.util.ArrayList;
import java.util.List;
import it.noah.crawler.annotation.Column;
import it.noah.crawler.converter.Converter;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.model.Provincia;
public class ProvinciaConverter implements Converter<Provincia> {
@Override
public List<Provincia> convertTable(Table table) {
List<Provincia> province = new ArrayList<Provincia>();
int j = 0;
for (int i = 0; i < table.columns(); i++) {
Provincia provincia = new Provincia();
Column annotations = provincia.getClass()
.getAnnotation(Column.class);
province.add(provincia);
}
return province;
}
}

View File

@@ -0,0 +1,51 @@
package it.noah.crawler.dom;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.dom.tag.builder.TableBuilder;
import it.noah.crawler.exception.ObjectNotFoundException;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class NoahDOMExplorer {
private Document document;
public NoahDOMExplorer(String url) throws IOException {
document = Jsoup.connect(url).get();
}
public synchronized Document getDocument() {
return document;
}
public synchronized Table getTable(String tableName, boolean header)
throws ObjectNotFoundException {
if (document == null) {
throw new ObjectNotFoundException(
"Documento non presente in sessione");
}
Elements elements = document.getElementsByTag("table");
Element element = selectByName(elements, tableName);
TableBuilder tableBuilder = new TableBuilder();
return tableBuilder.build(element, header);
}
private synchronized Element selectByName(Elements elements, String name) {
Element selected = null;
if (name == null || elements == null) {
throw new IllegalArgumentException("Parametri non validi!!!");
}
for (Element current : elements) {
if (name.equals(current.className())) {
selected = current;
}
}
return selected;
}
}

View File

@@ -0,0 +1,14 @@
package it.noah.crawler.dom.tag;
public class Cell {
private String value;
public void setValue(String value) {
this.value = value;
}
public String getValue() {
return value;
}
}

View File

@@ -0,0 +1,33 @@
package it.noah.crawler.dom.tag;
import java.util.List;
public class Table {
private TableHeader header;
private List<TableRow> rows;
public Table() {
header = new TableHeader();
}
public void setHeader(TableHeader header) {
this.header = header;
}
public TableHeader getHeader() {
return header;
}
public void setRows(List<TableRow> rows) {
this.rows = rows;
}
public List<TableRow> getRows() {
return rows;
}
public int columns() {
return header.size();
}
}

View File

@@ -0,0 +1,25 @@
package it.noah.crawler.dom.tag;
import java.util.LinkedList;
import java.util.List;
public class TableHeader {
private List<Cell> cells = new LinkedList<Cell>();
public void addColumn(Cell cell) {
cells.add(cell);
}
public List<Cell> getColumns() {
return cells;
}
public Cell getColumn(int index) {
return cells.get(index);
}
public int size() {
return cells.size();
}
}

View File

@@ -0,0 +1,21 @@
package it.noah.crawler.dom.tag;
import java.util.LinkedList;
import java.util.List;
public class TableRow {
private List<Cell> cells = new LinkedList<Cell>();
public List<Cell> getRow() {
return cells;
}
public void addRow(List<Cell> row) {
cells = row;
}
public int size() {
return cells.size();
}
}

View File

@@ -0,0 +1,26 @@
package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.Table;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class TableBuilder {
public Table build(Element element, boolean headerFlag) {
Table table = new Table();
Elements elements = element.select("tr");
Element header;
if (headerFlag) {
header = elements.select("tr").first();
header.remove();
TableHeaderBuilder thb = new TableHeaderBuilder();
table.setHeader(thb.build(header));
}
Elements rows = elements.select("tr");
TableRowBuilder thr = new TableRowBuilder();
// table.setBody(thr.build(rows)); TODO
return table;
}
}

View File

@@ -0,0 +1,12 @@
package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.TableHeader;
import org.jsoup.nodes.Element;
public class TableHeaderBuilder {
public TableHeader build(Element element) {
return null;
}
}

View File

@@ -0,0 +1,22 @@
package it.noah.crawler.dom.tag.builder;
import it.noah.crawler.dom.tag.Cell;
import it.noah.crawler.dom.tag.TableRow;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class TableRowBuilder {
public TableRow build(Elements rows) {
TableRow tableRow = new TableRow();
for (Element row : rows) {
Elements tds = row.select("td");
Cell cell = new Cell();
cell.setValue(tds.text());
// tableRow.addColumn(cell); TODO
}
return tableRow;
}
}

View File

@@ -0,0 +1,5 @@
package it.noah.crawler.enums;
public enum CrawlerEnum {
PROVINCIA_CRAWLER, COMUNE_CRAWLER
}

View File

@@ -0,0 +1,17 @@
package it.noah.crawler.enums;
public enum UrlEnum {
PROVINCE_URL(
"http://www.aci.it/i-servizi/normative/codice-della-strada/elenco-sigle-province-ditalia.html"),
COMUNI_URL("https://it.wikipedia.org/wiki/Citt%C3%A0_d'Italia");
private String url;
UrlEnum(String url) {
this.url = url;
}
public String getUrl() {
return this.url;
}
}

View File

@@ -0,0 +1,18 @@
package it.noah.crawler.exception;
public class NoahCrawlerException extends Exception {
private static final long serialVersionUID = 1L;
private String message;
public NoahCrawlerException(String message) {
this.message = message;
}
@Override
public String getMessage() {
return this.message;
}
}

View File

@@ -0,0 +1,18 @@
package it.noah.crawler.exception;
public class ObjectNotFoundException extends Exception {
private static final long serialVersionUID = -4004459574648431770L;
private String message;
public ObjectNotFoundException(String message) {
this.message = message;
}
@Override
public String getMessage() {
return this.message;
}
}

View File

@@ -0,0 +1,21 @@
package it.noah.crawler.factory;
import it.noah.crawler.NoahCrawler;
import it.noah.crawler.enums.CrawlerEnum;
import it.noah.crawler.impl.ComuneNoahCrawler;
import it.noah.crawler.impl.ProvinciaNoahCrawler;
import java.io.IOException;
public class NoahCrawlerFactory {
public static NoahCrawler getInstance(CrawlerEnum crawler) throws IOException {
if (crawler == CrawlerEnum.PROVINCIA_CRAWLER) {
return new ProvinciaNoahCrawler();
} else if (crawler == CrawlerEnum.COMUNE_CRAWLER) {
return new ComuneNoahCrawler();
}
return null;
}
}

View File

@@ -0,0 +1,36 @@
package it.noah.crawler.impl;
import it.noah.crawler.AbstractNoahCrawler;
import it.noah.crawler.NoahCrawler;
import it.noah.crawler.converter.impl.ComuneConverter;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.enums.UrlEnum;
import it.noah.crawler.exception.NoahCrawlerException;
import it.noah.crawler.exception.ObjectNotFoundException;
import it.noah.crawler.model.Comune;
import java.io.IOException;
import java.util.List;
public class ComuneNoahCrawler extends AbstractNoahCrawler implements
NoahCrawler {
public ComuneNoahCrawler() throws IOException {
super(UrlEnum.COMUNI_URL.getUrl());
}
@Override
public void run() throws NoahCrawlerException {
try {
List<Comune> comuni = convertTable2Comuni(getTable("", true));
// TODO una volta creata la lista va salvata sul db
} catch (ObjectNotFoundException e) {
e.printStackTrace();
}
}
private List<Comune> convertTable2Comuni(Table table) {
return new ComuneConverter().convertTable(table);
}
}

View File

@@ -0,0 +1,36 @@
package it.noah.crawler.impl;
import it.noah.crawler.AbstractNoahCrawler;
import it.noah.crawler.NoahCrawler;
import it.noah.crawler.converter.impl.ProvinciaConverter;
import it.noah.crawler.dom.tag.Table;
import it.noah.crawler.enums.UrlEnum;
import it.noah.crawler.exception.NoahCrawlerException;
import it.noah.crawler.exception.ObjectNotFoundException;
import it.noah.crawler.model.Provincia;
import java.io.IOException;
import java.util.List;
public class ProvinciaNoahCrawler extends AbstractNoahCrawler implements
NoahCrawler {
public ProvinciaNoahCrawler() throws IOException {
super(UrlEnum.PROVINCE_URL.getUrl());
}
@Override
public void run() throws NoahCrawlerException {
try {
List<Provincia> province = convertTable2Provincia(getTable("", true));
// TODO una volta creata la lista va salvata sul db
} catch (ObjectNotFoundException e) {
e.printStackTrace();
}
}
private List<Provincia> convertTable2Provincia(Table table) {
return new ProvinciaConverter().convertTable(table);
}
}

View File

@@ -0,0 +1,65 @@
package it.noah.crawler.model;
import it.noah.crawler.annotation.Column;
import it.noah.crawler.annotation.NoahCrawlerModel;
import it.noah.crawler.annotation.Transiet;
@NoahCrawlerModel
public class Comune {
@Transiet
private Long id;
@Column(name = "")
private String nome;
@Column(name = "")
private String cap;
@Column(name = "")
private Provincia provincia;
@Column(name = "")
private String regione;
public void setId(Long id) {
this.id = id;
}
public Long getId() {
return id;
}
public String getNome() {
return nome;
}
public void setNome(String nome) {
this.nome = nome;
}
public String getCap() {
return cap;
}
public void setCap(String cap) {
this.cap = cap;
}
public Provincia getProvincia() {
return provincia;
}
public void setProvincia(Provincia provincia) {
this.provincia = provincia;
}
public void setRegione(String regione) {
this.regione = regione;
}
public String getRegione() {
return regione;
}
}

View File

@@ -0,0 +1,67 @@
package it.noah.crawler.model;
import it.noah.crawler.annotation.Column;
import it.noah.crawler.annotation.NoahCrawlerModel;
import it.noah.crawler.annotation.Transiet;
import java.util.List;
@NoahCrawlerModel
public class Provincia {
@Transiet
private Long id;
@Column(name = "Provincia")
private String nome;
@Column(name = "Sigla")
private String sigla;
@Column(name = "Regione")
private String regione;
@Transiet
private List<Comune> comuni;
public void setId(Long id) {
this.id = id;
}
public Long getId() {
return id;
}
public String getNome() {
return nome;
}
public void setNome(String nome) {
this.nome = nome;
}
public String getSigla() {
return sigla;
}
public void setSigla(String sigla) {
this.sigla = sigla;
}
public void setComuni(List<Comune> comuni) {
this.comuni = comuni;
}
public List<Comune> getComuni() {
return comuni;
}
public void setRegione(String regione) {
this.regione = regione;
}
public String getRegione() {
return regione;
}
}