-
Notifications
You must be signed in to change notification settings - Fork 3
Part five #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
oleksiipet
wants to merge
8
commits into
hyperskill:master
Choose a base branch
from
oleksiipet:part-five
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Part five #5
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
240c7e2
first phase frame implement
opetenko fcc75f5
second phase frame implement
opetenko 4072d7d
third phase frame implementation
opetenko 56308c5
third phase frame implementation
opetenko 077f6d6
fourth phase frame implementation
opetenko 71adb15
fourth phase frame implementation
opetenko 61510c1
fifth phase frame implementation
opetenko ee09f62
fifth phase frame implementation fixed review comments
opetenko File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,13 @@ | ||
| package webcrawler; | ||
|
|
||
| import webcrawler.crawl.HtmlPageParser; | ||
| import webcrawler.source.PageSourceReader; | ||
|
|
||
| public class ApplicationRunner { | ||
| public static void main(String[] args) { | ||
| new WebCrawler(); | ||
| } | ||
|
|
||
| public static void main(String[] args) { | ||
| HtmlPageParser pageParser = new HtmlPageParser(); | ||
| PageSourceReader pageSourceReader = new PageSourceReader(); | ||
| new WebCrawlerWindow(pageParser, pageSourceReader); | ||
| } | ||
| } |
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| package webcrawler; | ||
|
|
||
| import java.awt.ComponentOrientation; | ||
| import java.awt.Dimension; | ||
| import java.awt.FlowLayout; | ||
| import java.nio.file.Paths; | ||
| import javax.swing.BorderFactory; | ||
| import javax.swing.BoxLayout; | ||
| import javax.swing.JButton; | ||
| import javax.swing.JFrame; | ||
| import javax.swing.JLabel; | ||
| import javax.swing.JPanel; | ||
| import javax.swing.JScrollPane; | ||
| import javax.swing.JTable; | ||
| import javax.swing.JTextField; | ||
| import javax.swing.SwingUtilities; | ||
| import javax.swing.table.DefaultTableModel; | ||
| import webcrawler.crawl.HtmlPageParser; | ||
| import webcrawler.source.PageSourceReader; | ||
| import webcrawler.workers.ExportTableWorker; | ||
| import webcrawler.workers.PageLoadWorker; | ||
|
|
||
| public class WebCrawlerWindow extends JFrame { | ||
|
|
||
| private final JTable table; | ||
| private final JTextField location; | ||
| private final JButton goButton; | ||
| private final JLabel titleLabelInfo; | ||
| private final JLabel titleLabel; | ||
| private final JLabel urlLabel; | ||
| private final JLabel exportLabel; | ||
| private final JTextField exportLocation; | ||
| private final JButton exportButton; | ||
|
|
||
| private final DefaultTableModel tableModel; | ||
|
|
||
| private final PageSourceReader pageReader; | ||
| private final HtmlPageParser pageParser; | ||
|
|
||
| public WebCrawlerWindow(HtmlPageParser pageParser, PageSourceReader pageReader) { | ||
| this.pageParser = pageParser; | ||
| this.pageReader = pageReader; | ||
| setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); | ||
| setSize(600, 600); | ||
|
|
||
| this.urlLabel = new JLabel("URL: "); | ||
| this.location = new JTextField("https://wikipedia.org"); | ||
| this.goButton = new JButton("Parse"); | ||
| this.table = new JTable(); | ||
| this.tableModel = new DefaultTableModel(new String[]{"URL", "Title"}, 0); | ||
| this.titleLabelInfo = new JLabel("Title: "); | ||
| this.titleLabel = new JLabel(); | ||
| this.exportLabel = new JLabel("Export: "); | ||
| this.exportLocation = new JTextField("export/links.txt"); | ||
| this.exportButton = new JButton("Save"); | ||
|
|
||
| initLayout(); | ||
| initActions(); | ||
| setVisible(true); | ||
| } | ||
|
|
||
| private void initActions() { | ||
| goButton.addActionListener(e -> | ||
| SwingUtilities.invokeLater(() -> { | ||
| String url = location.getText(); | ||
| PageLoadWorker pageLoadWorker = new PageLoadWorker(url, titleLabel, tableModel, | ||
| pageReader, | ||
| pageParser); | ||
| pageLoadWorker.execute(); | ||
| })); | ||
|
|
||
| exportButton.addActionListener(e -> | ||
| SwingUtilities.invokeLater(() -> { | ||
| String file = exportLocation.getText(); | ||
| ExportTableWorker pageLoadWorker = new ExportTableWorker(Paths.get(file), tableModel); | ||
| pageLoadWorker.execute(); | ||
| })); | ||
| } | ||
|
|
||
| private void initLayout() { | ||
| var rootPanel = getContentPane(); | ||
|
|
||
| var locationPanel = new JPanel(); | ||
| locationPanel.setLayout(new BoxLayout(locationPanel, BoxLayout.LINE_AXIS)); | ||
| locationPanel.setBorder(BorderFactory.createEmptyBorder(5, 5, 5, 5)); | ||
| locationPanel.add(urlLabel); | ||
| locationPanel.add(location); | ||
| locationPanel.add(goButton); | ||
|
|
||
| table.setModel(tableModel); | ||
| var titlePanel = new JPanel(); | ||
| titlePanel.setLayout(new FlowLayout(FlowLayout.LEFT)); | ||
| titlePanel.setComponentOrientation(ComponentOrientation.LEFT_TO_RIGHT); | ||
| titlePanel.setBorder(BorderFactory.createEmptyBorder(5, 5, 5, 5)); | ||
| titlePanel.add(titleLabelInfo); | ||
| titlePanel.add(titleLabel); | ||
|
|
||
| var scrollPane = new JScrollPane(table); | ||
| scrollPane.setPreferredSize(new Dimension(400, 400)); | ||
|
|
||
| var areaPanel = new JPanel(); | ||
| areaPanel.setLayout(new BoxLayout(areaPanel, BoxLayout.PAGE_AXIS)); | ||
| areaPanel.setBorder(BorderFactory.createEmptyBorder(10, 10, 10, 10)); | ||
| areaPanel.add(scrollPane); | ||
|
|
||
| var exportPanel = new JPanel(); | ||
| exportPanel.setLayout(new BoxLayout(exportPanel, BoxLayout.LINE_AXIS)); | ||
| exportPanel.setBorder(BorderFactory.createEmptyBorder(5, 5, 5, 5)); | ||
| exportPanel.add(exportLabel); | ||
| exportPanel.add(exportLocation); | ||
| exportPanel.add(exportButton); | ||
|
|
||
| rootPanel.setLayout(new BoxLayout(rootPanel, BoxLayout.PAGE_AXIS)); | ||
| rootPanel.add(locationPanel); | ||
| rootPanel.add(titlePanel); | ||
| rootPanel.add(areaPanel); | ||
| rootPanel.add(exportPanel); | ||
|
|
||
| pack(); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| package webcrawler.crawl; | ||
|
|
||
| import java.util.Set; | ||
|
|
||
| public class Html { | ||
|
|
||
| private final String title; | ||
| private final Set<String> links; | ||
|
|
||
| Html(String title, Set<String> links) { | ||
| this.title = title; | ||
| this.links = links; | ||
| } | ||
|
|
||
| public String getTitle() { | ||
| return title; | ||
| } | ||
|
|
||
| public Set<String> getLinks() { | ||
| return links; | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| package webcrawler.crawl; | ||
|
|
||
| import java.util.Set; | ||
|
|
||
| public class HtmlBuilder { | ||
|
|
||
| private String title; | ||
| private Set<String> links; | ||
|
|
||
| HtmlBuilder() { | ||
| title = "no title"; | ||
| links = Set.of(); | ||
| } | ||
|
|
||
| HtmlBuilder withTitle(String title) { | ||
| this.title = title; | ||
| return this; | ||
| } | ||
|
|
||
| HtmlBuilder withLinks(Set<String> links) { | ||
| this.links = links; | ||
| return this; | ||
| } | ||
|
|
||
| Html build() { | ||
| return new Html(title, links); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| package webcrawler.crawl; | ||
|
|
||
| import java.util.HashSet; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| public class HtmlPageParser { | ||
|
|
||
| private Pattern titlePattern; | ||
| private Pattern linkPattern; | ||
|
|
||
| public HtmlPageParser() { | ||
| this.titlePattern = Pattern.compile("<title>(.*)</title>"); | ||
| this.linkPattern = Pattern.compile("<a\\s+(?:[^>]*?\\s+)?href=([\"'])(.*?)\\1"); | ||
| } | ||
|
|
||
| public Html parse(String siteText, String rootHost) { | ||
| HtmlBuilder htmlBuilder = new HtmlBuilder(); | ||
|
|
||
| buildTitle(htmlBuilder, siteText); | ||
| buildLinks(htmlBuilder, siteText, rootHost); | ||
|
|
||
| return htmlBuilder.build(); | ||
| } | ||
|
|
||
| private void buildLinks(HtmlBuilder htmlBuilder, String siteText, String rootHost) { | ||
| Matcher linksMatcher = linkPattern.matcher(siteText); | ||
| boolean found = linksMatcher.find(); | ||
| HashSet<String> links = new HashSet<>(); | ||
| while (found) { | ||
| String group = linksMatcher.group(2); | ||
| String link = normalize(group, rootHost); | ||
| links.add(link); | ||
| found = linksMatcher.find(); | ||
| } | ||
| htmlBuilder.withLinks(links); | ||
| } | ||
|
|
||
| private void buildTitle(HtmlBuilder htmlBuilder, String siteText) { | ||
| Matcher titleMatcher = titlePattern.matcher(siteText); | ||
| boolean found = titleMatcher.find(); | ||
| if (found) { | ||
| htmlBuilder.withTitle(titleMatcher.group(1)); | ||
| } | ||
| } | ||
|
|
||
| private String normalize(String link, String rootHost) { | ||
| if (link.startsWith("http://") || link.startsWith("https://")) { | ||
| return link; | ||
| } else if (link.startsWith("//")) { | ||
| String protocol = rootHost.startsWith("https") ? "https:" : "http:"; | ||
| return protocol + link; | ||
| } else { | ||
| return rootHost + "/" + link; | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| package webcrawler.source; | ||
|
|
||
| import java.io.BufferedReader; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.io.InputStreamReader; | ||
| import java.net.URL; | ||
| import java.net.URLConnection; | ||
|
|
||
| public class PageSourceReader { | ||
|
|
||
| private static final String LINE_SEPARATOR = System.getProperty("line.separator"); | ||
| private static final String TEXT_HTML = "text/html"; | ||
| public static final String USER_AGENT_MOZILLA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"; | ||
|
|
||
| public boolean isHtml(String url) { | ||
| try { | ||
| URLConnection urlConnection = new URL(url).openConnection(); | ||
| urlConnection.setRequestProperty("User-Agent", USER_AGENT_MOZILLA); | ||
| if (urlConnection.getContentType() != null && urlConnection.getContentType() | ||
| .contains(TEXT_HTML)) { | ||
| return true; | ||
| } | ||
| } catch (IOException e) { | ||
| System.out.println("Error " + e); | ||
| return false; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| public String readPageSource(String url) { | ||
| final URLConnection urlConnection; | ||
| try { | ||
| urlConnection = new URL(url).openConnection(); | ||
| urlConnection.setRequestProperty("User-Agent", USER_AGENT_MOZILLA); | ||
| return readFromConnection(urlConnection); | ||
| } catch (IOException e) { | ||
| System.out.println("Error on parse page " + url); | ||
| return ""; | ||
| } | ||
| } | ||
|
|
||
| private String readFromConnection(URLConnection urlConnection) { | ||
| try ( | ||
| InputStream inputStream = urlConnection.getInputStream(); | ||
| BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)) | ||
| ) { | ||
| final StringBuilder stringBuilder = new StringBuilder(); | ||
| String nextLine; | ||
| while ((nextLine = reader.readLine()) != null) { | ||
| stringBuilder.append(nextLine); | ||
| stringBuilder.append(LINE_SEPARATOR); | ||
| } | ||
| return stringBuilder.toString(); | ||
| } catch (IOException e) { | ||
| return ""; | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| package webcrawler.workers; | ||
|
|
||
| import java.io.BufferedWriter; | ||
| import java.io.PrintWriter; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import javax.swing.SwingWorker; | ||
| import javax.swing.table.DefaultTableModel; | ||
|
|
||
| public class ExportTableWorker extends SwingWorker<Void, Void> { | ||
|
|
||
| private Path exportPath; | ||
| private DefaultTableModel tableModel; | ||
|
|
||
| public ExportTableWorker(Path exportPath, DefaultTableModel tableModel) { | ||
| this.exportPath = exportPath; | ||
| this.tableModel = tableModel; | ||
| } | ||
|
|
||
| @Override | ||
| protected Void doInBackground() throws Exception { | ||
| Files.createDirectories(exportPath.getParent()); | ||
| try (BufferedWriter bufferedWriter = Files.newBufferedWriter(exportPath); | ||
| PrintWriter printWriter = new PrintWriter(bufferedWriter)) { | ||
| int rowCount = tableModel.getRowCount(); | ||
| for (int i = 0; i < rowCount; i++) { | ||
| printWriter.printf("%s\n%s\n", tableModel.getValueAt(i, 0), tableModel.getValueAt(i, 1)); | ||
| } | ||
| } | ||
| return null; | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Emptiness