From 7c6b961bf263a406112c81351296b89d1c2e949c Mon Sep 17 00:00:00 2001 From: Fabio Scotto di Santolo Date: Wed, 4 Jan 2017 12:11:04 +0100 Subject: [PATCH] ProvinciaCrawler completato --- NoahCrawler/.classpath | 2 +- NoahCrawler/.gitignore | 1 + NoahCrawler/pom.xml | 5 + .../crawler/impl/ProvinciaNoahCrawler.java | 7 +- .../src/it/noah/crawler/io/DirFilter.java | 13 ++ .../src/it/noah/crawler/io/FileUtils.java | 189 ++++++++++++++++++ .../crawler/persistence/AccessProvincia.java | 51 +++++ .../persistence/ConnectionFactory.java | 18 ++ 8 files changed, 284 insertions(+), 2 deletions(-) create mode 100644 NoahCrawler/src/it/noah/crawler/io/DirFilter.java create mode 100644 NoahCrawler/src/it/noah/crawler/io/FileUtils.java create mode 100644 NoahCrawler/src/it/noah/crawler/persistence/AccessProvincia.java create mode 100644 NoahCrawler/src/it/noah/crawler/persistence/ConnectionFactory.java diff --git a/NoahCrawler/.classpath b/NoahCrawler/.classpath index 28da404..811218f 100644 --- a/NoahCrawler/.classpath +++ b/NoahCrawler/.classpath @@ -16,7 +16,7 @@ - + diff --git a/NoahCrawler/.gitignore b/NoahCrawler/.gitignore index 65313a4..4457296 100644 --- a/NoahCrawler/.gitignore +++ b/NoahCrawler/.gitignore @@ -1,3 +1,4 @@ /build/ /target/ /Sviluppo/ +/properties.txt diff --git a/NoahCrawler/pom.xml b/NoahCrawler/pom.xml index b9e1de3..71583a6 100644 --- a/NoahCrawler/pom.xml +++ b/NoahCrawler/pom.xml @@ -40,5 +40,10 @@ slf4j-simple 1.7.22 + + mysql + mysql-connector-java + 5.1.40 + \ No newline at end of file diff --git a/NoahCrawler/src/it/noah/crawler/impl/ProvinciaNoahCrawler.java b/NoahCrawler/src/it/noah/crawler/impl/ProvinciaNoahCrawler.java index d0dd9aa..e106b50 100644 --- a/NoahCrawler/src/it/noah/crawler/impl/ProvinciaNoahCrawler.java +++ b/NoahCrawler/src/it/noah/crawler/impl/ProvinciaNoahCrawler.java @@ -1,6 +1,7 @@ package it.noah.crawler.impl; import java.io.IOException; +import java.sql.SQLException; import java.util.List; import org.slf4j.Logger; @@ -14,6 +15,7 @@ import it.noah.crawler.enums.UrlEnum; import it.noah.crawler.exception.NoahCrawlerException; import it.noah.crawler.exception.ObjectNotFoundException; import it.noah.crawler.model.Provincia; +import it.noah.crawler.persistence.AccessProvincia; public class ProvinciaNoahCrawler extends AbstractNoahCrawler implements NoahCrawler { @@ -31,9 +33,12 @@ public class ProvinciaNoahCrawler extends AbstractNoahCrawler List province = convertTable2Provincia( getTable("", true)); log.debug("Provincia convertita con successo!!!"); - // TODO una volta creata la lista va salvata sul db + AccessProvincia access = new AccessProvincia(); + access.insertProvince(province); } catch (ObjectNotFoundException e) { log.error(e.getMessage()); + } catch (SQLException e) { + log.error(e.getMessage()); } } diff --git a/NoahCrawler/src/it/noah/crawler/io/DirFilter.java b/NoahCrawler/src/it/noah/crawler/io/DirFilter.java new file mode 100644 index 0000000..b2b1625 --- /dev/null +++ b/NoahCrawler/src/it/noah/crawler/io/DirFilter.java @@ -0,0 +1,13 @@ +package it.noah.crawler.io; + +import java.io.File; +import java.io.FilenameFilter; + +public class DirFilter implements FilenameFilter { + + @Override + public boolean accept(File dir, String name) { + return new File(dir, name).isDirectory(); + } + +} diff --git a/NoahCrawler/src/it/noah/crawler/io/FileUtils.java b/NoahCrawler/src/it/noah/crawler/io/FileUtils.java new file mode 100644 index 0000000..7883655 --- /dev/null +++ b/NoahCrawler/src/it/noah/crawler/io/FileUtils.java @@ -0,0 +1,189 @@ +package it.noah.crawler.io; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.StringTokenizer; + +/** + * Classe per la gestione dei file + * + * @author Fabio Scotto di Santolo + * @since 22/12/2016 + */ +public class FileUtils { + + public static int countBytes(String path) throws IOException { + InputStream in = new FileInputStream(path); + int total = 0; + + try { + while (in.read() != -1) { + total++; + } + } finally { + if (in != null) { + in.close(); + } + } + + return total; + } + + public static void translateByte(InputStream in, OutputStream out, + String[] args) throws IOException { + byte from = (byte) args[0].charAt(0); + byte to = (byte) args[1].charAt(0); + int b = 0; + + try { + while ((b = in.read()) != -1) { + out.write(b == from ? to : b); + } + } finally { + if (in != null) { + in.close(); + } + if (out != null) { + out.close(); + } + } + } + + public static int countSpace(String path) throws IOException { + Reader in = new FileReader(path); + int ch = 0; + int spaces = 0; + + try { + for (int i = 0; (ch = in.read()) != -1; i++) { + if (Character.isWhitespace((char) ch)) { + spaces++; + } + } + } finally { + if (in != null) { + in.close(); + } + } + + return spaces; + } + + public static void copy(String src, String dest) throws IOException { + File file = new File(src); + File copy = new File(dest); + InputStream in = new FileInputStream(file); + OutputStream out = new FileOutputStream(copy); + int c = 0; + + try { + if (file.exists()) { + while ((c = in.read()) != -1) { + out.write(c); + } + } + } finally { + if (in != null) { + in.close(); + } + if (out != null) { + out.close(); + } + } + } + + public static List tokens(String path, String delimiter) + throws IOException { + File file = new File(path); + InputStream in = new FileInputStream(file); + PushbackInputStream pis = new PushbackInputStream(in); + List line = new ArrayList(); + int c = 0; + + try { + if (file.exists()) { + while ((c = pis.read()) != -1) { + String str = new String(); + pis.unread(c); + while ((c = pis.read()) != '\n') { + str += (char) c; + } + + StringTokenizer token = new StringTokenizer(str, delimiter); + + while (token.hasMoreTokens()) { + line.add(token.nextToken()); + } + } + } + } finally { + if (in != null) { + in.close(); + } + } + + return line; + } + + public static Properties getProperties(String path) throws IOException { + Properties prop = new Properties(); + List list = tokens(path, "="); + int i = 0; + + while (i < list.size()) { + prop.setProperty(list.get(i), list.get(i + 1)); + i = i + 2; + } + + return prop; + } + + public static String[] fileNameFilter(String dirPath) { + File dir = new File(dirPath); + return dir.list(new DirFilter()); + } + + public static void serialize(Object obj, String path) throws IOException { + FileOutputStream fileOut = new FileOutputStream(path); + ObjectOutputStream out = new ObjectOutputStream(fileOut); + + try { + out.writeObject(obj); + } finally { + if (out != null) { + out.close(); + } + } + } + + @SuppressWarnings("unchecked") + public static T deserialize(String path, Class type) + throws IOException, ClassNotFoundException { + FileInputStream fileIn = new FileInputStream(path); + ObjectInputStream out = new ObjectInputStream(fileIn); + T ret = null; + + try { + ret = (T) out.readObject(); + } finally { + if (out != null) { + out.close(); + } + } + + return ret; + } + +} diff --git a/NoahCrawler/src/it/noah/crawler/persistence/AccessProvincia.java b/NoahCrawler/src/it/noah/crawler/persistence/AccessProvincia.java new file mode 100644 index 0000000..92c2a99 --- /dev/null +++ b/NoahCrawler/src/it/noah/crawler/persistence/AccessProvincia.java @@ -0,0 +1,51 @@ +package it.noah.crawler.persistence; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; + +import it.noah.crawler.model.Provincia; + +public class AccessProvincia { + + public void insertProvince(List province) throws SQLException { + Connection conn = null; + PreparedStatement regioni = null; + PreparedStatement stmt = null; + ResultSet regione = null; + + try { + conn = ConnectionFactory.getInstance(); + conn.setAutoCommit(false); + for (Provincia provincia : province) { + regioni = conn.prepareStatement( + "select id from regioni where nome = ?"); + regioni.setString(1, provincia.getRegione()); + regione = regioni.executeQuery(); + regione.next(); + stmt = conn.prepareStatement( + "insert into province (idRegione, nome, sigla) values (?, ?, ?)"); + stmt.setInt(1, regione.getInt("id")); + stmt.setString(2, provincia.getNome()); + stmt.setString(3, provincia.getSigla()); + stmt.executeUpdate(); + if (stmt != null && regione != null) { + regione.close(); + stmt.close(); + } + } + conn.commit(); + } catch (Exception e) { + if (conn != null) { + conn.rollback(); + } + } finally { + if (conn != null) { + conn.close(); + } + } + } + +} diff --git a/NoahCrawler/src/it/noah/crawler/persistence/ConnectionFactory.java b/NoahCrawler/src/it/noah/crawler/persistence/ConnectionFactory.java new file mode 100644 index 0000000..3c743de --- /dev/null +++ b/NoahCrawler/src/it/noah/crawler/persistence/ConnectionFactory.java @@ -0,0 +1,18 @@ +package it.noah.crawler.persistence; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +public class ConnectionFactory { + + public static Connection getInstance() + throws ClassNotFoundException, SQLException, IOException { + Class.forName("com.mysql.jdbc.Driver"); + Connection conn = DriverManager.getConnection( + "jdbc:mysql://localhost:3306/noah_d10", "root", "root"); + return conn; + } + +}