Rectangle 27 4

I've had success with Apache POI and reading and writing powerpoint presentations on GAE. The important thing is to avoid calls in POI that would invoke the security restricted java.awt classes. For reading content from a document, java.awt files are avoided so you should be fine. For writing content to a document, this is where you have to be careful. I use a predefined template and adjust the text and fonts directly. This avoids java.awt calls. If you try to create a new PPT document using an existing document as a template (as shown in the POI examples), this will fail due to java.awt calls and GAE prohibiting them. Your mileage may vary using Word docs as I imagine there are less graphical library calls.

You'll probably struggle with new Document formats like Word 2010 and you'll obviously have to use URLfetch / Google Cloud Storage / Blobstore for working with the files. GAE doesn't support native file access.

Does google app engine support apache poi? - Stack Overflow

google-app-engine
Rectangle 27 1

I even cannot get valuable results using XWPFDocument.createTOC() (fields for page numbers are wrong for example). So I have never used that. What I can say is that XWPFDocument.createTOC() currently not creates a "Table of Contents" like Word itself will do. So after opening in Word it will not be recognized as TOC from Word GUI.

But if the question is only about the styles, then that can be answered. There must be styles named "toc 1", "toc 2", "toc 3", ..., "toc n" present for each heading level. This can be achieved using XML like:

<w:style w:styleId="TOC1" w:type="paragraph">
 <w:name w:val="toc 1"/>
 <w:basedOn w:val="Normal"/>
 <w:next w:val="Normal"/>
 <w:autoRedefine/><w:unhideWhenUsed/>
 <w:rPr>
  <w:b/><w:bCs/><w:caps/><w:sz w:val="32"/><w:szCs w:val="32"/>
 </w:rPr>
</w:style>

The most formatting is done within the rPr (Run Properties) element and it's children.

import java.io.File;
import java.io.FileOutputStream;

import org.apache.poi.xwpf.usermodel.*;

import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumbering;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyle;

import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;

import java.math.BigInteger;

public class CreateWordNumberedHeadings {

 static String cTAbstractNumDecimalXML = 
  "<w:abstractNum xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:abstractNumId=\"0\">"
+ "<w:multiLevelType w:val=\"hybridMultilevel\"/>"
+ "<w:lvl w:ilvl=\"0\"><w:start w:val=\"1\"/><w:numFmt w:val=\"decimal\"/><w:lvlText w:val=\"%1\"/><w:lvlJc w:val=\"left\"/><w:pPr><w:ind w:left=\"360\" w:hanging=\"360\"/></w:pPr></w:lvl>"
+ "<w:lvl w:ilvl=\"1\" w:tentative=\"1\"><w:start w:val=\"1\"/><w:numFmt w:val=\"decimal\"/><w:lvlText w:val=\"%1.%2\"/><w:lvlJc w:val=\"left\"/><w:pPr><w:ind w:left=\"720\" w:hanging=\"360\"/></w:pPr></w:lvl>"
+ "<w:lvl w:ilvl=\"2\" w:tentative=\"1\"><w:start w:val=\"1\"/><w:numFmt w:val=\"decimal\"/><w:lvlText w:val=\"%1.%2.%3\"/><w:lvlJc w:val=\"left\"/><w:pPr><w:ind w:left=\"1440\" w:hanging=\"360\"/></w:pPr></w:lvl>"
+ "</w:abstractNum>";

 static String cTStyleNormal =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:default=\"1\" w:styleId=\"Normal\">" 
+ "<w:name w:val=\"Normal\"/>"
+ "<w:qFormat/>"
+ "</w:style>";
 static String cTStyleH1XML =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:styleId=\"Heading1\">"
+ "<w:name w:val=\"heading 1\"/>"
+ "<w:basedOn w:val=\"Normal\"/>"
+ "<w:next w:val=\"Normal\"/>"
+ "<w:unhideWhenUsed/>"
+ "<w:qFormat/>"
+ "<w:rPr><w:b/><w:sz w:val=\"36\"/></w:rPr>"
+ "</w:style>";
 static String cTStyleH2XML =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:styleId=\"Heading2\">"
+ "<w:name w:val=\"heading 2\"/>"
+ "<w:basedOn w:val=\"Normal\"/>"
+ "<w:next w:val=\"Normal\"/>"
+ "<w:unhideWhenUsed/>"
+ "<w:qFormat/>"
+ "<w:rPr><w:sz w:val=\"32\"/></w:rPr>"
+ "</w:style>";
 static String cTStyleH3XML =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:styleId=\"Heading3\">"
+ "<w:name w:val=\"heading 3\"/>"
+ "<w:basedOn w:val=\"Normal\"/>"
+ "<w:next w:val=\"Normal\"/>"
+ "<w:unhideWhenUsed/>"
+ "<w:qFormat/>"
+ "<w:rPr><w:i/><w:sz w:val=\"28\"/></w:rPr>"
+ "</w:style>";

static String cTStyleTOC1 =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:styleId=\"TOC1\">"
+ "<w:name w:val=\"toc 1\"/>"
+ "<w:basedOn w:val=\"Normal\"/>"
+ "<w:next w:val=\"Normal\"/>"
+ "<w:autoRedefine/><w:unhideWhenUsed/>"
+ "<w:rPr><w:b/><w:bCs/><w:caps/><w:sz w:val=\"32\"/><w:szCs w:val=\"32\"/></w:rPr>"
+ "</w:style>";

static String cTStyleTOC2 =
  "<w:style xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" w:type=\"paragraph\" w:styleId=\"TOC2\">"
+ "<w:name w:val=\"toc 2\"/>"
+ "<w:basedOn w:val=\"Normal\"/>"
+ "<w:next w:val=\"Normal\"/>"
+ "<w:autoRedefine/><w:unhideWhenUsed/>"
+ "<w:rPr><w:i/><w:iCs/><w:sz w:val=\"28\"/><w:szCs w:val=\"28\"/></w:rPr>"
+ "</w:style>";

 public static void main(String[] args) throws Exception {

  XWPFDocument document = new XWPFDocument();       

  XWPFNumbering numbering = document.createNumbering();
  CTNumbering cTNumbering = CTNumbering.Factory.parse(cTAbstractNumDecimalXML);
  CTAbstractNum cTAbstractNum = cTNumbering.getAbstractNumArray(0);
  XWPFAbstractNum abstractNum = new XWPFAbstractNum(cTAbstractNum);

  BigInteger abstractNumID = numbering.addAbstractNum(abstractNum);
  BigInteger numID = numbering.addNum(abstractNumID);

  XWPFStyles styles = document.createStyles();
  CTStyles cTStyles = CTStyles.Factory.parse(cTStyleNormal);
  CTStyle cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));
  cTStyles = CTStyles.Factory.parse(cTStyleH1XML);
  cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));
  cTStyles = CTStyles.Factory.parse(cTStyleH2XML);
  cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));
  cTStyles = CTStyles.Factory.parse(cTStyleH3XML);
  cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));

  cTStyles = CTStyles.Factory.parse(cTStyleTOC1);
  cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));
  cTStyles = CTStyles.Factory.parse(cTStyleTOC2);
  cTStyle = cTStyles.getStyleArray(0);
  styles.addStyle(new XWPFStyle(cTStyle));

  createParagraphs(document, numID, "First Level@@Second Level@@First Level@@Second Level@@Third Level@@Second Level@@Third Level@@Second Level@@First Level");

  XWPFParagraph paragraph = document.createParagraph();
  XWPFRun run=paragraph.createRun();
  run.setText("Table of contents:");

  paragraph = document.createParagraph();

  //document.createTOC();

  CTSimpleField toc = paragraph.getCTP().addNewFldSimple();
  toc.setInstr("TOC \\* MERGEFORMAT");
  toc.setDirty(STOnOff.TRUE);

  FileOutputStream out = new FileOutputStream("CreateWordNumberedHeadings.docx");   
  document.write(out);

 }

 public static void createParagraphs(XWPFDocument doc, BigInteger numID, String content) {
  for (String value : content.split("@@")) {
   XWPFParagraph para = doc.createParagraph();
   para.setVerticalAlignment(TextAlignment.CENTER);
   para.setNumID(numID);
   para.setStyle("Heading1");
   if (value.contains("Second")) {
    para.getCTP().getPPr().getNumPr().addNewIlvl().setVal(BigInteger.valueOf(1));
    para.setStyle("Heading2");
   }
   if(value.contains("Third")) {
    para.getCTP().getPPr().getNumPr().addNewIlvl().setVal(BigInteger.valueOf(2));
    para.setStyle("Heading3");
   }
   XWPFRun run = para.createRun();
   run.setText(value);
   para = doc.createParagraph();
   run = para.createRun();
   run.setText("Lorem ipsum semit dolor ...");
   run.addBreak(BreakType.PAGE); 
  } 
 }
}

Note: I do not use XWPFDocument.createTOC() for mentioned reasons. Instead I am inserting a field which must be refreshed while opening the document in Word. This leads to a confirm dialog while opening which must be answered with Yes.

document.createTOC();  XWPFParagraph paragraph = document.createParagraph();  CTSimpleField toc = paragraph.getCTP().addNewFldSimple();  toc.setInstr("TOC \\* MERGEFORMAT");  toc.setDirty(STOnOff.TRUE);  createParagraphs(document, numID, "...content...");

apache poi - How to set table of content's font style including font c...

apache-poi
Rectangle 27 1

You can set the default font of the document like this (using your variable names):

XWPFStyles styles = document.createStyles();

CTFonts fonts = CTFonts.Factory.newInstance();
fonts.setEastAsia(eastAsiaFontName);
fonts.setHAnsi(normalFontName);

styles.setDefaultFonts(fonts);

I am currently stuck at how I set the default font size. That's why I found this question... The XWPFStyles class has a member "CTStyles ctStyles" with which it would be possible to define any property defined in the XML spec. But unfortunately it has only a public setter, but no getter.

How to set the global font of word file via Apache poi? - Stack Overfl...

apache-poi
Rectangle 27 27

I think Apache POI can do the job. A possible problem depending on the usage your aiming to may be caused by the fact that HWPF is still in early development.

HWPF is the set of APIs for reading and writing Microsoft Word 97(-XP) documents using (only) Java.

Any knowledge of graph and table possibilities? How about tables of contents? Anyone have real experience doing those things in POI?

Looking at the documentation for POI, it seems this HWPF is very early in development, mainly allowing for reading text out of a .doc, not really for dynamic creation of "complex" documents.

I think you'd be better off using docx4j, but I would, since I work on that project. docx4j is focused on docx documents, and uses JAXB, not XML Beans.

What's a good Java API for creating Word documents? - Stack Overflow

java ms-word docx doc
Rectangle 27 1

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.15</version>
</dependency>

The classes we used in above code snippet, HSSFWorkbook and HSSFSheet works for .xls format. In order to work with .xlsx use XSSFWorkbook and XSSFSheet class.

HSSF (Horrible SpreadSheet Format) reads and writes Microsoft Excel (XLS) format files.

XSSF (XML SpreadSheet Format) reads and writes Office Open XML (XLSX) format files.

HWPF (Horrible Word Processor Format) aims to read and write Microsoft Word 97 (DOC) format files.

HSLF (Horrible Slide Layout Format) a pure Java implementation for Microsoft PowerPoint files.

HPBF (Horrible PuBlisher Format) a pure Java implementation for Microsoft Publisher files.

HSMF (Horrible Stupid Mail Format) a pure Java implementation for Microsoft Outlook MSG files.

DDF (Dreadful Drawing Format) a package for decoding the Microsoft Office Drawing format.

import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
//..
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = workbook.createSheet("FuSsA sheet");
//Create a new row in current sheet
Row row = sheet.createRow(0);
//Create a new cell in current row
Cell cell = row.createCell(0);
//Set value to new value
cell.setCellValue("Slim Shady");
    try {
        FileOutputStream out = 
                new FileOutputStream(new File("C:\\new.xls"));
        workbook.write(out);
        out.close();
        System.out.println("Excel written successfully..");

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
try {
            FileInputStream file = new FileInputStream(new File("C:\\update.xls"));

            HSSFWorkbook workbook = new HSSFWorkbook(file);
            HSSFSheet sheet = workbook.getSheetAt(0);
            Cell cell = null;

            //Update the value of cell
            cell = sheet.getRow(1).getCell(2);
            cell.setCellValue(cell.getNumericCellValue() * 2);
            cell = sheet.getRow(2).getCell(2);
            cell.setCellValue(cell.getNumericCellValue() * 2);
            cell = sheet.getRow(3).getCell(2);
            cell.setCellValue(cell.getNumericCellValue() * 2);

            file.close();

            FileOutputStream outFile =new FileOutputStream(new File("C:\\update.xls"));
            workbook.write(outFile);
            outFile.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

For more details, Adding Formulas and Adding Styles to Cell you can check this link: Read / Write Excel file in Java using Apache POI

java - Read / Write different Microsoft Office file formats using Apac...

java maven apache-poi xlsx
Rectangle 27 2

Apache POI is one of the more useful libraries for doing MS Word on java.

Our requirement is we can't use third party tools or jar files which we have to buy. They suggested me to complete the requirement by using open source jar files and by using java. No office org tools, etc; So i specifically asked the source code. If not you help me to reach my requirement by providing required information. Do that favour.It will be helpful for me.

Apache POI is Open-Source. If you want to do it yourself, go read the Microsoft documentation on how to parse a spreadsheet(Beware, it's not as much fun as you might think).

How to print excel file and word document in java? - Stack Overflow

java
Rectangle 27 1174

The app server and the world

The word "deployment" can have two meanings depending on the context. You are also confusing the roles of Apache/Nginx with the roles of other components.

Historic note: This article was originally written on November 6, 2010, when the Ruby app server ecosystem was limited. I've updated this article on March 15 2013 with all the latest updates in the ecosystem.

Disclaimer: I am one of the authors of Phusion Passenger, one of the app servers.

They're both web servers. They can serve static files but - with the right modules - can also serve dynamic web apps e.g. those written in PHP. Apache is more popular and has more features, Nginx is smaller and faster and has less features.

Neither Apache nor Nginx can serve Ruby web apps out-of-the-box, to do that you need to use Apache/Nginx in combination with some kind of add-on, described later.

Apache and Nginx can also act as reverse proxies, meaning that they can take an incoming HTTP request and forward it to another server, which also speaks HTTP. When that server responds with an HTTP response, Apache/Nginx will forward the response back to the client; You will learn later why this is relevant.

Mongrel is a Ruby "application server": In concrete terms this means that Mongrel is an application which:

  • Sets up a TCP socket, allowing it to communicate with the outside world (e.g. the Internet). Mongrel listens for HTTP requests on this socket and passes the request data to the Ruby web app.
  • The Ruby web app then returns an object, which describes what the HTTP response should look like, and Mongrel takes care of converting it to an actual HTTP response (the actual bytes) and sends it back over the socket.

However Mongrel is quite dated, nowadays it is no longer maintained. Newer alternative application servers are:

I'll cover them later and describe how they differ from each other and from Mongrel.

WEBrick does the same thing as Mongrel, but the differences are:

  • WEBrick is not fit for production, unlike everything else that I mentioned before. WEBrick is written entirely in Ruby. Mongrel (and most other Ruby app servers) is part Ruby and part C (Mostly Ruby), but its HTTP parser is written in C for performance.
  • WEBrick is usually only used as the default server during development because WEBrick is included in Ruby by default. Mongrel and other app servers needs to be installed separately. It's not recommended to use WEBrick in production environments, though for some reason Heroku chose WEBrick as its default server. They were using Thin before, so I have no idea why they switched to WEBrick.

All current Ruby app servers speak HTTP, however some app servers may be directly exposed to the Internet on port 80, while others may not.

  • App servers that can be directly exposed to the Internet: Phusion Passenger, Rainbows
  • App servers that may not be directly exposed to the Internet: Mongrel, Unicorn, Thin, Puma. These app servers must be put behind a reverse proxy web server like Apache and Nginx.
  • I don't know enough about Trinidad and TorqueBox, so I've omitted them.
  • Some app servers can only handle 1 request concurrently, per process. If you want to handle 2 requests concurrently you need to run multiple app server instances, each serving the same Ruby app. This set of app server processes is called an app server cluster (hence the name Mongrel Cluster, Thin Cluster, etc). You must then setup Apache or Nginx to reverse proxy to this cluster. Apache/Nginx will take care of distributing requests between the instances in the cluster (More on this in section "I/O concurrency models").
  • The web server can buffer requests and responses, protecting the app server from "slow clients" - HTTP clients that don't send or accept data very quickly. You don't want your app server to do nothing while waiting for the client to send the full request or to receive the full response, because during that time the app server may not be able to do anything else. Apache and Nginx are very good at doing many things at the same time because they're either multithreaded or evented.
  • Most app servers can serve static files, but are not particularly good at it. Apache and Nginx can do it faster.
  • People typically set up Apache/Nginx to serve static files directly, but forward requests that don't correspond with static files to the app server, it's good security practice. Apache and Nginx are very mature and can shield the app server from (perhaps maliciously) corrupted requests.

Why can some app servers be directly exposed to the Internet?

  • Phusion Passenger is a very different beast from all the other app servers. One of its unique features is that it integrates into the web server.
  • The Rainbows author publicly stated that it's safe to directly expose it to the Internet. The author is fairly sure that there are no vulnerabilities in the HTTP parser (and similar). Still, the author provides no warranty and says that usage is at own risk.

In this section I'll compare most application servers I've mentioned, but not Phusion Passenger. Phusion Passenger is such a different beast from the rest that I've given it a dedicated section. I've also omitted Trinidad and TorqueBox because I do not know them well enough, but they're only relevant anyway if you use JRuby.

  • Mongrel was pretty bare bones. As mentioned earlier, Mongrel is purely single-threaded multi-process, so it is only useful in a cluster. There is no process monitoring: if a process in the cluster crashes (e.g. because of a bug in the app) then it needs to be manually restarted. People tend to use external process monitoring tools such as Monit and God.
  • Unicorn is a fork of Mongrel. It supports limited process monitoring: if a process crashes it is automatically restarted by the master process. It can make all processes listen on a single shared socket, instead of a separate socket for each process. This simplifies reverse proxy configuration. Like Mongrel, it is purely single-threaded multi-process.
  • Thin uses the evented I/O model by utilizing the EventMachine library. Other than using the Mongrel HTTP parser, it is not based on Mongrel in any way. Its cluster mode has no process monitoring so you need to monitor crashes etc. There is no Unicorn-like shared socket, so each process listens on its own socket. In theory, Thin's I/O model allows high concurrency, but in most practical situations that Thin is used for, one Thin process can only handle 1 concurrent request, so you still need a cluster. More about this peculiar property in section "I/O concurrency models".
  • Puma was also forked from Mongrel, but unlike Unicorn, Puma is designed to be purely multi-threaded. There is therefore currently no builtin cluster support. You need to take special care to ensure that you can utilize multiple cores (More about this in section "I/O concurrency models").
  • Rainbows supports multiple concurrency models through the use of different libraries.

Phusion Passenger works very differently from all the other ones. Phusion Passenger integrates directly into Apache or Nginx, and so can be compared to mod_php for Apache. Just like mod_php allows Apache to serve PHP apps, almost magically, Phusion Passenger allows Apache (and also Nginx!) to serve Ruby apps, almost magically. Phusion Passenger's goal is to make everything Just Work(tm) with as little hassle as possible.

Instead of starting a process or cluster for your app, and configuring Apache/Nginx to serve static files and/or reverse proxying requests to the process/cluster with Phusion Passenger you only need to:

  • You edit the web server config file and specify the location of your Ruby app's 'public' directory.

All configuration is done within the web server config file. Phusion Passenger automates pretty much everything. There is no need to start a cluster and manage processes. Starting/stopping processes, restarting them when they crash, etc. - all automated. Compared to other app servers, Phusion Passenger has far fewer moving parts. This ease of use is one of the primary reasons why people use Phusion Passenger.

Also unlike other app servers, Phusion Passenger is primarily written in C++, making it very fast.

There's also an Enterprise variant of Phusion Passenger with even more features, such as automated rolling restarts, multithreading support, deployment error resistance, etc.

For the above reasons, Phusion Passenger is currently the most popular Ruby app server, powering over 150,000 websites, including large ones such as New York Times, Pixar, Airbnb, etc.

Phusion Passenger provides a lot more features and provides many advantages over other app servers, such as:

  • Dynamically adjusting the number of processes based on traffic. We run a ton of Rails apps on our resource-constrainted server that are not public-facing, and that people in our organization only use at most a few times a day. Things like Gitlab, Redmine, etc. Phusion Passenger can spin down those processes when they're not used, and spinning them up when they're used, allowing more resources to be available for more important apps. With other app servers, all your processes are turned on all the time.

Workloads that Unicorn is not good at are:

  • Workloads in which the app performs HTTP API calls.

The hybrid I/O model in Phusion Passenger Enterprise 4 or later makes it an excellent choice for these kinds of workloads.

  • Other app servers require the user to run at least one instance per application. By contrast, Phusion Passenger supports multiple applications in a single instance. This greatly reduces administration overhead.
  • Automatic user switching, a convenient security feature.
  • Phusion Passenger supports many MRI Ruby, JRuby and Rubinius. Mongrel, Unicorn and Thin only support MRI. Puma also supports all 3.
  • Phusion Passenger actually supports more than just Ruby! It also supports Python WSGI, so it can for example also run Django and Flask apps. In fact Phusion Passenger is moving into the direction of becoming a polyglot server. Node.js support on the todo list.
  • Out-of-band garbage collection. Phusion Passenger can run the Ruby garbage collector outside the normal request/response cycle, potentially reducing request times by hundreds of milliseconds. Unicorn also has a similar feature, but Phusion Passenger's version is more flexible because 1) it's not limited to GC and can be used for arbitrary work. 2) Phusion Passenger's version works well with multithreaded apps, while Unicorn's does not.
  • Automated rolling restarts. Rolling restarts on Unicorn and other servers require some scripting work. Phusion Passenger Enterprise completely automates this way for you.

There are more features and advantages, but the list is really long. You should refer to the comprehensive Phusion Passenger manual (Apache version, Nginx version) or the Phusion Passenger website for information.

  • Single-threaded multi-process. This is traditionally the most popular I/O model for Ruby app servers, partially because multithreading support in the Ruby ecosystem was very bad. Each process can handle exactly 1 request at a time. The web server load balances between processes. This model is very robust and there is little chance for the programmer to introduce concurrency bugs. However, its I/O concurrency is extremely limited (limited by the number of processes). This model is very suitable for fast, short-running workloads. It is very unsuitable for slow, long-running blocking I/O workloads, e.g. workloads involving the calling of HTTP APIs.
  • Purely multi-threaded. Nowadays the Ruby ecosystem has excellent multithreading support, so this I/O model has become very viable. Multithreading allows high I/O concurrency, making it suitable for both short-running and long-running blocking I/O workloads. The programmer is more likely to introduce concurrency bugs, but luckily most web frameworks are designed in such a way that this is still very unlikely. One thing to note however is that the MRI Ruby interpreter cannot leverage multiple CPU cores even when there are multiple threads, due to the use of the Global Interpreter Lock (GIL). You can work around this by using multiple multi-threaded processes, because each process can leverage a CPU core. JRuby and Rubinius have no GIL, so they can fully leverage multiple cores in a single process.
  • Hybrid multi-threaded multi-process. Primarily implemented by Phusion Passenger Enterprise 4 and later. You can easily switch between single-threaded multi-process, purely multithreaded, or perhaps even multiple processes each with multiple threads. This model gives the best of both worlds.
  • Evented. This model is completely different from the previously mentioned model. It allows very high I/O concurrency and is therefore excellent for long-running blocking I/O workloads. To utilize it, explicit support from the application and the framework is required. However all the major frameworks like Rails and Sinatra do not support evented code. This is why in practice a Thin process still cannot handle more than 1 request at a time, making it effectively behave the same as the single-threaded multi-process model. There are specialized frameworks that can take advantage of evented I/O, such as Cramp.

An article was recently posted on the Phusion blog about optimally tuning the number of processes and threads given your workload. See Tuning Phusion Passenger's concurrency settings.

Capistrano is something completely different. In all the previous sections, "deployment" refers to the act of starting your Ruby app in an application server, so that it becomes accessible to visitors, but before that can happen one typically needs to do some preparation work, such as:

  • Uploading the Ruby app's code and files to the server machine.
  • Setting up or migrating the database.
  • Starting and stopping any daemons that your app might rely on, such as Sidekiq/Resque workers or whatever.

In the context of Capistrano, "deployment" refers to doing all this preparation work. Capistrano is not an application server. Instead, it is a tool for automating all that preparation work. You tell Capistrano where your server is and which commands need to be run every time you deploy a new version of your app, and Capistrano will take care of uploading the Rails app to the server for you and running the commands you specified.

Capistrano is always used in combination with an application server. It does not replace application servers. Vice-versa, application servers do not replace Capistrano, they can be used in combination with Capistrano.

Of course you don't have to use Capistrano. If you prefer to upload your Ruby app with FTP and manually running the same steps of commands every time, then you can do that. Other people got tired of it, so they automate those steps in Capistrano.

You should publish this somewhere. It's all easy now but when I first started with rails it was hard to get any useful info.

Excellent post! Cleared up a lot for me too. You should add some other elements like bundler and rvm and make it a heavy-hitting blog post! :)

This needs to be in the Rails guides.

"Nobody uses WEBrick in production environments." This is not true at all. The default app server when pushing ruby apps to heroku is webrick.

apache - Ruby on Rails Server options - Stack Overflow

ruby-on-rails apache passenger mongrel
Rectangle 27 1174

The app server and the world

The word "deployment" can have two meanings depending on the context. You are also confusing the roles of Apache/Nginx with the roles of other components.

Historic note: This article was originally written on November 6, 2010, when the Ruby app server ecosystem was limited. I've updated this article on March 15 2013 with all the latest updates in the ecosystem.

Disclaimer: I am one of the authors of Phusion Passenger, one of the app servers.

They're both web servers. They can serve static files but - with the right modules - can also serve dynamic web apps e.g. those written in PHP. Apache is more popular and has more features, Nginx is smaller and faster and has less features.

Neither Apache nor Nginx can serve Ruby web apps out-of-the-box, to do that you need to use Apache/Nginx in combination with some kind of add-on, described later.

Apache and Nginx can also act as reverse proxies, meaning that they can take an incoming HTTP request and forward it to another server, which also speaks HTTP. When that server responds with an HTTP response, Apache/Nginx will forward the response back to the client; You will learn later why this is relevant.

Mongrel is a Ruby "application server": In concrete terms this means that Mongrel is an application which:

  • Sets up a TCP socket, allowing it to communicate with the outside world (e.g. the Internet). Mongrel listens for HTTP requests on this socket and passes the request data to the Ruby web app.
  • The Ruby web app then returns an object, which describes what the HTTP response should look like, and Mongrel takes care of converting it to an actual HTTP response (the actual bytes) and sends it back over the socket.

However Mongrel is quite dated, nowadays it is no longer maintained. Newer alternative application servers are:

I'll cover them later and describe how they differ from each other and from Mongrel.

WEBrick does the same thing as Mongrel, but the differences are:

  • WEBrick is not fit for production, unlike everything else that I mentioned before. WEBrick is written entirely in Ruby. Mongrel (and most other Ruby app servers) is part Ruby and part C (Mostly Ruby), but its HTTP parser is written in C for performance.
  • WEBrick is usually only used as the default server during development because WEBrick is included in Ruby by default. Mongrel and other app servers needs to be installed separately. It's not recommended to use WEBrick in production environments, though for some reason Heroku chose WEBrick as its default server. They were using Thin before, so I have no idea why they switched to WEBrick.

All current Ruby app servers speak HTTP, however some app servers may be directly exposed to the Internet on port 80, while others may not.

  • App servers that can be directly exposed to the Internet: Phusion Passenger, Rainbows
  • App servers that may not be directly exposed to the Internet: Mongrel, Unicorn, Thin, Puma. These app servers must be put behind a reverse proxy web server like Apache and Nginx.
  • I don't know enough about Trinidad and TorqueBox, so I've omitted them.
  • Some app servers can only handle 1 request concurrently, per process. If you want to handle 2 requests concurrently you need to run multiple app server instances, each serving the same Ruby app. This set of app server processes is called an app server cluster (hence the name Mongrel Cluster, Thin Cluster, etc). You must then setup Apache or Nginx to reverse proxy to this cluster. Apache/Nginx will take care of distributing requests between the instances in the cluster (More on this in section "I/O concurrency models").
  • The web server can buffer requests and responses, protecting the app server from "slow clients" - HTTP clients that don't send or accept data very quickly. You don't want your app server to do nothing while waiting for the client to send the full request or to receive the full response, because during that time the app server may not be able to do anything else. Apache and Nginx are very good at doing many things at the same time because they're either multithreaded or evented.
  • Most app servers can serve static files, but are not particularly good at it. Apache and Nginx can do it faster.
  • People typically set up Apache/Nginx to serve static files directly, but forward requests that don't correspond with static files to the app server, it's good security practice. Apache and Nginx are very mature and can shield the app server from (perhaps maliciously) corrupted requests.

Why can some app servers be directly exposed to the Internet?

  • Phusion Passenger is a very different beast from all the other app servers. One of its unique features is that it integrates into the web server.
  • The Rainbows author publicly stated that it's safe to directly expose it to the Internet. The author is fairly sure that there are no vulnerabilities in the HTTP parser (and similar). Still, the author provides no warranty and says that usage is at own risk.

In this section I'll compare most application servers I've mentioned, but not Phusion Passenger. Phusion Passenger is such a different beast from the rest that I've given it a dedicated section. I've also omitted Trinidad and TorqueBox because I do not know them well enough, but they're only relevant anyway if you use JRuby.

  • Mongrel was pretty bare bones. As mentioned earlier, Mongrel is purely single-threaded multi-process, so it is only useful in a cluster. There is no process monitoring: if a process in the cluster crashes (e.g. because of a bug in the app) then it needs to be manually restarted. People tend to use external process monitoring tools such as Monit and God.
  • Unicorn is a fork of Mongrel. It supports limited process monitoring: if a process crashes it is automatically restarted by the master process. It can make all processes listen on a single shared socket, instead of a separate socket for each process. This simplifies reverse proxy configuration. Like Mongrel, it is purely single-threaded multi-process.
  • Thin uses the evented I/O model by utilizing the EventMachine library. Other than using the Mongrel HTTP parser, it is not based on Mongrel in any way. Its cluster mode has no process monitoring so you need to monitor crashes etc. There is no Unicorn-like shared socket, so each process listens on its own socket. In theory, Thin's I/O model allows high concurrency, but in most practical situations that Thin is used for, one Thin process can only handle 1 concurrent request, so you still need a cluster. More about this peculiar property in section "I/O concurrency models".
  • Puma was also forked from Mongrel, but unlike Unicorn, Puma is designed to be purely multi-threaded. There is therefore currently no builtin cluster support. You need to take special care to ensure that you can utilize multiple cores (More about this in section "I/O concurrency models").
  • Rainbows supports multiple concurrency models through the use of different libraries.

Phusion Passenger works very differently from all the other ones. Phusion Passenger integrates directly into Apache or Nginx, and so can be compared to mod_php for Apache. Just like mod_php allows Apache to serve PHP apps, almost magically, Phusion Passenger allows Apache (and also Nginx!) to serve Ruby apps, almost magically. Phusion Passenger's goal is to make everything Just Work(tm) with as little hassle as possible.

Instead of starting a process or cluster for your app, and configuring Apache/Nginx to serve static files and/or reverse proxying requests to the process/cluster with Phusion Passenger you only need to:

  • You edit the web server config file and specify the location of your Ruby app's 'public' directory.

All configuration is done within the web server config file. Phusion Passenger automates pretty much everything. There is no need to start a cluster and manage processes. Starting/stopping processes, restarting them when they crash, etc. - all automated. Compared to other app servers, Phusion Passenger has far fewer moving parts. This ease of use is one of the primary reasons why people use Phusion Passenger.

Also unlike other app servers, Phusion Passenger is primarily written in C++, making it very fast.

There's also an Enterprise variant of Phusion Passenger with even more features, such as automated rolling restarts, multithreading support, deployment error resistance, etc.

For the above reasons, Phusion Passenger is currently the most popular Ruby app server, powering over 150,000 websites, including large ones such as New York Times, Pixar, Airbnb, etc.

Phusion Passenger provides a lot more features and provides many advantages over other app servers, such as:

  • Dynamically adjusting the number of processes based on traffic. We run a ton of Rails apps on our resource-constrainted server that are not public-facing, and that people in our organization only use at most a few times a day. Things like Gitlab, Redmine, etc. Phusion Passenger can spin down those processes when they're not used, and spinning them up when they're used, allowing more resources to be available for more important apps. With other app servers, all your processes are turned on all the time.

Workloads that Unicorn is not good at are:

  • Workloads in which the app performs HTTP API calls.

The hybrid I/O model in Phusion Passenger Enterprise 4 or later makes it an excellent choice for these kinds of workloads.

  • Other app servers require the user to run at least one instance per application. By contrast, Phusion Passenger supports multiple applications in a single instance. This greatly reduces administration overhead.
  • Automatic user switching, a convenient security feature.
  • Phusion Passenger supports many MRI Ruby, JRuby and Rubinius. Mongrel, Unicorn and Thin only support MRI. Puma also supports all 3.
  • Phusion Passenger actually supports more than just Ruby! It also supports Python WSGI, so it can for example also run Django and Flask apps. In fact Phusion Passenger is moving into the direction of becoming a polyglot server. Node.js support on the todo list.
  • Out-of-band garbage collection. Phusion Passenger can run the Ruby garbage collector outside the normal request/response cycle, potentially reducing request times by hundreds of milliseconds. Unicorn also has a similar feature, but Phusion Passenger's version is more flexible because 1) it's not limited to GC and can be used for arbitrary work. 2) Phusion Passenger's version works well with multithreaded apps, while Unicorn's does not.
  • Automated rolling restarts. Rolling restarts on Unicorn and other servers require some scripting work. Phusion Passenger Enterprise completely automates this way for you.

There are more features and advantages, but the list is really long. You should refer to the comprehensive Phusion Passenger manual (Apache version, Nginx version) or the Phusion Passenger website for information.

  • Single-threaded multi-process. This is traditionally the most popular I/O model for Ruby app servers, partially because multithreading support in the Ruby ecosystem was very bad. Each process can handle exactly 1 request at a time. The web server load balances between processes. This model is very robust and there is little chance for the programmer to introduce concurrency bugs. However, its I/O concurrency is extremely limited (limited by the number of processes). This model is very suitable for fast, short-running workloads. It is very unsuitable for slow, long-running blocking I/O workloads, e.g. workloads involving the calling of HTTP APIs.
  • Purely multi-threaded. Nowadays the Ruby ecosystem has excellent multithreading support, so this I/O model has become very viable. Multithreading allows high I/O concurrency, making it suitable for both short-running and long-running blocking I/O workloads. The programmer is more likely to introduce concurrency bugs, but luckily most web frameworks are designed in such a way that this is still very unlikely. One thing to note however is that the MRI Ruby interpreter cannot leverage multiple CPU cores even when there are multiple threads, due to the use of the Global Interpreter Lock (GIL). You can work around this by using multiple multi-threaded processes, because each process can leverage a CPU core. JRuby and Rubinius have no GIL, so they can fully leverage multiple cores in a single process.
  • Hybrid multi-threaded multi-process. Primarily implemented by Phusion Passenger Enterprise 4 and later. You can easily switch between single-threaded multi-process, purely multithreaded, or perhaps even multiple processes each with multiple threads. This model gives the best of both worlds.
  • Evented. This model is completely different from the previously mentioned model. It allows very high I/O concurrency and is therefore excellent for long-running blocking I/O workloads. To utilize it, explicit support from the application and the framework is required. However all the major frameworks like Rails and Sinatra do not support evented code. This is why in practice a Thin process still cannot handle more than 1 request at a time, making it effectively behave the same as the single-threaded multi-process model. There are specialized frameworks that can take advantage of evented I/O, such as Cramp.

An article was recently posted on the Phusion blog about optimally tuning the number of processes and threads given your workload. See Tuning Phusion Passenger's concurrency settings.

Capistrano is something completely different. In all the previous sections, "deployment" refers to the act of starting your Ruby app in an application server, so that it becomes accessible to visitors, but before that can happen one typically needs to do some preparation work, such as:

  • Uploading the Ruby app's code and files to the server machine.
  • Setting up or migrating the database.
  • Starting and stopping any daemons that your app might rely on, such as Sidekiq/Resque workers or whatever.

In the context of Capistrano, "deployment" refers to doing all this preparation work. Capistrano is not an application server. Instead, it is a tool for automating all that preparation work. You tell Capistrano where your server is and which commands need to be run every time you deploy a new version of your app, and Capistrano will take care of uploading the Rails app to the server for you and running the commands you specified.

Capistrano is always used in combination with an application server. It does not replace application servers. Vice-versa, application servers do not replace Capistrano, they can be used in combination with Capistrano.

Of course you don't have to use Capistrano. If you prefer to upload your Ruby app with FTP and manually running the same steps of commands every time, then you can do that. Other people got tired of it, so they automate those steps in Capistrano.

You should publish this somewhere. It's all easy now but when I first started with rails it was hard to get any useful info.

Excellent post! Cleared up a lot for me too. You should add some other elements like bundler and rvm and make it a heavy-hitting blog post! :)

This needs to be in the Rails guides.

"Nobody uses WEBrick in production environments." This is not true at all. The default app server when pushing ruby apps to heroku is webrick.

apache - Ruby on Rails Server options - Stack Overflow

ruby-on-rails apache passenger mongrel
Rectangle 27 1166

The app server and the world

The word "deployment" can have two meanings depending on the context. You are also confusing the roles of Apache/Nginx with the roles of other components.

Historic note: This article was originally written on November 6, 2010, when the Ruby app server ecosystem was limited. I've updated this article on March 15 2013 with all the latest updates in the ecosystem.

Disclaimer: I am one of the authors of Phusion Passenger, one of the app servers.

They're both web servers. They can serve static files but - with the right modules - can also serve dynamic web apps e.g. those written in PHP. Apache is more popular and has more features, Nginx is smaller and faster and has less features.

Neither Apache nor Nginx can serve Ruby web apps out-of-the-box, to do that you need to use Apache/Nginx in combination with some kind of add-on, described later.

Apache and Nginx can also act as reverse proxies, meaning that they can take an incoming HTTP request and forward it to another server, which also speaks HTTP. When that server responds with an HTTP response, Apache/Nginx will forward the response back to the client; You will learn later why this is relevant.

Mongrel is a Ruby "application server": In concrete terms this means that Mongrel is an application which:

  • Sets up a TCP socket, allowing it to communicate with the outside world (e.g. the Internet). Mongrel listens for HTTP requests on this socket and passes the request data to the Ruby web app.
  • The Ruby web app then returns an object, which describes what the HTTP response should look like, and Mongrel takes care of converting it to an actual HTTP response (the actual bytes) and sends it back over the socket.

However Mongrel is quite dated, nowadays it is no longer maintained. Newer alternative application servers are:

I'll cover them later and describe how they differ from each other and from Mongrel.

WEBrick does the same thing as Mongrel, but the differences are:

  • WEBrick is not fit for production, unlike everything else that I mentioned before. WEBrick is written entirely in Ruby. Mongrel (and most other Ruby app servers) is part Ruby and part C (Mostly Ruby), but its HTTP parser is written in C for performance.
  • WEBrick is usually only used as the default server during development because WEBrick is included in Ruby by default. Mongrel and other app servers needs to be installed separately. It's not recommended to use WEBrick in production environments, though for some reason Heroku chose WEBrick as its default server. They were using Thin before, so I have no idea why they switched to WEBrick.

All current Ruby app servers speak HTTP, however some app servers may be directly exposed to the Internet on port 80, while others may not.

  • App servers that can be directly exposed to the Internet: Phusion Passenger, Rainbows
  • App servers that may not be directly exposed to the Internet: Mongrel, Unicorn, Thin, Puma. These app servers must be put behind a reverse proxy web server like Apache and Nginx.
  • I don't know enough about Trinidad and TorqueBox, so I've omitted them.
  • Some app servers can only handle 1 request concurrently, per process. If you want to handle 2 requests concurrently you need to run multiple app server instances, each serving the same Ruby app. This set of app server processes is called an app server cluster (hence the name Mongrel Cluster, Thin Cluster, etc). You must then setup Apache or Nginx to reverse proxy to this cluster. Apache/Nginx will take care of distributing requests between the instances in the cluster (More on this in section "I/O concurrency models").
  • The web server can buffer requests and responses, protecting the app server from "slow clients" - HTTP clients that don't send or accept data very quickly. You don't want your app server to do nothing while waiting for the client to send the full request or to receive the full response, because during that time the app server may not be able to do anything else. Apache and Nginx are very good at doing many things at the same time because they're either multithreaded or evented.
  • Most app servers can serve static files, but are not particularly good at it. Apache and Nginx can do it faster.
  • People typically set up Apache/Nginx to serve static files directly, but forward requests that don't correspond with static files to the app server, it's good security practice. Apache and Nginx are very mature and can shield the app server from (perhaps maliciously) corrupted requests.

Why can some app servers be directly exposed to the Internet?

  • Phusion Passenger is a very different beast from all the other app servers. One of its unique features is that it integrates into the web server.
  • The Rainbows author publicly stated that it's safe to directly expose it to the Internet. The author is fairly sure that there are no vulnerabilities in the HTTP parser (and similar). Still, the author provides no warranty and says that usage is at own risk.

In this section I'll compare most application servers I've mentioned, but not Phusion Passenger. Phusion Passenger is such a different beast from the rest that I've given it a dedicated section. I've also omitted Trinidad and TorqueBox because I do not know them well enough, but they're only relevant anyway if you use JRuby.

  • Mongrel was pretty bare bones. As mentioned earlier, Mongrel is purely single-threaded multi-process, so it is only useful in a cluster. There is no process monitoring: if a process in the cluster crashes (e.g. because of a bug in the app) then it needs to be manually restarted. People tend to use external process monitoring tools such as Monit and God.
  • Unicorn is a fork of Mongrel. It supports limited process monitoring: if a process crashes it is automatically restarted by the master process. It can make all processes listen on a single shared socket, instead of a separate socket for each process. This simplifies reverse proxy configuration. Like Mongrel, it is purely single-threaded multi-process.
  • Thin uses the evented I/O model by utilizing the EventMachine library. Other than using the Mongrel HTTP parser, it is not based on Mongrel in any way. Its cluster mode has no process monitoring so you need to monitor crashes etc. There is no Unicorn-like shared socket, so each process listens on its own socket. In theory, Thin's I/O model allows high concurrency, but in most practical situations that Thin is used for, one Thin process can only handle 1 concurrent request, so you still need a cluster. More about this peculiar property in section "I/O concurrency models".
  • Puma was also forked from Mongrel, but unlike Unicorn, Puma is designed to be purely multi-threaded. There is therefore currently no builtin cluster support. You need to take special care to ensure that you can utilize multiple cores (More about this in section "I/O concurrency models").
  • Rainbows supports multiple concurrency models through the use of different libraries.

Phusion Passenger works very differently from all the other ones. Phusion Passenger integrates directly into Apache or Nginx, and so can be compared to mod_php for Apache. Just like mod_php allows Apache to serve PHP apps, almost magically, Phusion Passenger allows Apache (and also Nginx!) to serve Ruby apps, almost magically. Phusion Passenger's goal is to make everything Just Work(tm) with as little hassle as possible.

Instead of starting a process or cluster for your app, and configuring Apache/Nginx to serve static files and/or reverse proxying requests to the process/cluster with Phusion Passenger you only need to:

  • You edit the web server config file and specify the location of your Ruby app's 'public' directory.

All configuration is done within the web server config file. Phusion Passenger automates pretty much everything. There is no need to start a cluster and manage processes. Starting/stopping processes, restarting them when they crash, etc. - all automated. Compared to other app servers, Phusion Passenger has far fewer moving parts. This ease of use is one of the primary reasons why people use Phusion Passenger.

Also unlike other app servers, Phusion Passenger is primarily written in C++, making it very fast.

There's also an Enterprise variant of Phusion Passenger with even more features, such as automated rolling restarts, multithreading support, deployment error resistance, etc.

For the above reasons, Phusion Passenger is currently the most popular Ruby app server, powering over 150,000 websites, including large ones such as New York Times, Pixar, Airbnb, etc.

Phusion Passenger provides a lot more features and provides many advantages over other app servers, such as:

  • Dynamically adjusting the number of processes based on traffic. We run a ton of Rails apps on our resource-constrainted server that are not public-facing, and that people in our organization only use at most a few times a day. Things like Gitlab, Redmine, etc. Phusion Passenger can spin down those processes when they're not used, and spinning them up when they're used, allowing more resources to be available for more important apps. With other app servers, all your processes are turned on all the time.

Workloads that Unicorn is not good at are:

  • Workloads in which the app performs HTTP API calls.

The hybrid I/O model in Phusion Passenger Enterprise 4 or later makes it an excellent choice for these kinds of workloads.

  • Other app servers require the user to run at least one instance per application. By contrast, Phusion Passenger supports multiple applications in a single instance. This greatly reduces administration overhead.
  • Automatic user switching, a convenient security feature.
  • Phusion Passenger supports many MRI Ruby, JRuby and Rubinius. Mongrel, Unicorn and Thin only support MRI. Puma also supports all 3.
  • Phusion Passenger actually supports more than just Ruby! It also supports Python WSGI, so it can for example also run Django and Flask apps. In fact Phusion Passenger is moving into the direction of becoming a polyglot server. Node.js support on the todo list.
  • Out-of-band garbage collection. Phusion Passenger can run the Ruby garbage collector outside the normal request/response cycle, potentially reducing request times by hundreds of milliseconds. Unicorn also has a similar feature, but Phusion Passenger's version is more flexible because 1) it's not limited to GC and can be used for arbitrary work. 2) Phusion Passenger's version works well with multithreaded apps, while Unicorn's does not.
  • Automated rolling restarts. Rolling restarts on Unicorn and other servers require some scripting work. Phusion Passenger Enterprise completely automates this way for you.

There are more features and advantages, but the list is really long. You should refer to the comprehensive Phusion Passenger manual (Apache version, Nginx version) or the Phusion Passenger website for information.

  • Single-threaded multi-process. This is traditionally the most popular I/O model for Ruby app servers, partially because multithreading support in the Ruby ecosystem was very bad. Each process can handle exactly 1 request at a time. The web server load balances between processes. This model is very robust and there is little chance for the programmer to introduce concurrency bugs. However, its I/O concurrency is extremely limited (limited by the number of processes). This model is very suitable for fast, short-running workloads. It is very unsuitable for slow, long-running blocking I/O workloads, e.g. workloads involving the calling of HTTP APIs.
  • Purely multi-threaded. Nowadays the Ruby ecosystem has excellent multithreading support, so this I/O model has become very viable. Multithreading allows high I/O concurrency, making it suitable for both short-running and long-running blocking I/O workloads. The programmer is more likely to introduce concurrency bugs, but luckily most web frameworks are designed in such a way that this is still very unlikely. One thing to note however is that the MRI Ruby interpreter cannot leverage multiple CPU cores even when there are multiple threads, due to the use of the Global Interpreter Lock (GIL). You can work around this by using multiple multi-threaded processes, because each process can leverage a CPU core. JRuby and Rubinius have no GIL, so they can fully leverage multiple cores in a single process.
  • Hybrid multi-threaded multi-process. Primarily implemented by Phusion Passenger Enterprise 4 and later. You can easily switch between single-threaded multi-process, purely multithreaded, or perhaps even multiple processes each with multiple threads. This model gives the best of both worlds.
  • Evented. This model is completely different from the previously mentioned model. It allows very high I/O concurrency and is therefore excellent for long-running blocking I/O workloads. To utilize it, explicit support from the application and the framework is required. However all the major frameworks like Rails and Sinatra do not support evented code. This is why in practice a Thin process still cannot handle more than 1 request at a time, making it effectively behave the same as the single-threaded multi-process model. There are specialized frameworks that can take advantage of evented I/O, such as Cramp.

An article was recently posted on the Phusion blog about optimally tuning the number of processes and threads given your workload. See Tuning Phusion Passenger's concurrency settings.

Capistrano is something completely different. In all the previous sections, "deployment" refers to the act of starting your Ruby app in an application server, so that it becomes accessible to visitors, but before that can happen one typically needs to do some preparation work, such as:

  • Uploading the Ruby app's code and files to the server machine.
  • Setting up or migrating the database.
  • Starting and stopping any daemons that your app might rely on, such as Sidekiq/Resque workers or whatever.

In the context of Capistrano, "deployment" refers to doing all this preparation work. Capistrano is not an application server. Instead, it is a tool for automating all that preparation work. You tell Capistrano where your server is and which commands need to be run every time you deploy a new version of your app, and Capistrano will take care of uploading the Rails app to the server for you and running the commands you specified.

Capistrano is always used in combination with an application server. It does not replace application servers. Vice-versa, application servers do not replace Capistrano, they can be used in combination with Capistrano.

Of course you don't have to use Capistrano. If you prefer to upload your Ruby app with FTP and manually running the same steps of commands every time, then you can do that. Other people got tired of it, so they automate those steps in Capistrano.

You should publish this somewhere. It's all easy now but when I first started with rails it was hard to get any useful info.

Excellent post! Cleared up a lot for me too. You should add some other elements like bundler and rvm and make it a heavy-hitting blog post! :)

This needs to be in the Rails guides.

"Nobody uses WEBrick in production environments." This is not true at all. The default app server when pushing ruby apps to heroku is webrick.

apache - Ruby on Rails Server options - Stack Overflow

ruby-on-rails apache passenger mongrel
Rectangle 27 3

It was mentioned only briefly once, so I'd like to call out the docx4j library, as I've had more success with docx4j than anything else. Apache POI's support for Word documents isn't very good. Also, unlike Aspose.Words, docx4j is an open source library.

The only drawback is with docx4j you have to create Office Open XML (docx) format documents rather than OLE2-based (doc) format documents. This is the default format for Word 2007, but Word 2003 and earlier users will need to install a compatibility pack.

What's a good Java API for creating Word documents? - Stack Overflow

java ms-word docx doc
Rectangle 27 1

Tika uses Apache POI to process Word files (both the old binary- and the newer XML-based flavors).

Since POI (fundamentally) cannot read out those page numbers and Tika is not meant to be a document renderer either, the answer is very simply: No, this is not possible.

Get text from doc/docx file in pages using Apache tika - Stack Overflo...

apache-tika
Rectangle 27 195

Update : 4th August, 2015 :

If you have done clean installation of Windows 10, you may not have the Word Wide Web Publishing Service. In that case, simple WAMP/XAMPP installation should work fine.

If it doesn't, try installing Visual C++ Redistributable and then re-install WAMP/XAMPP.

I was facing a similar problem with WAMP. In Windows 10 TP, the Word Wide Web Publishing Service comes pre-installed. This is related to IIS and you can remove it if you don't need it.

This blocks the port 80 making Apache act weirdly. You can do the following and try again.

  • Go to Start, type in services.msc

This should make port 80 free and restarting WAMP/XAMPP should get you up and running!

What if I don't have "World Wide Web Publishing Service" as a service listed?

The name of the service is W3SVC while the description is World Wide Web Publishing Service. I initially did not think I had the service because I was not seeing the service described when sorting by name.

Yea, I had this problem as well, it kept telling me "A process witha PID of 4 is using port 80" when I checked it, the process name was "System". I kept thinking I could mess up my PC if i tried stopping this process. But this worked perfectly. Thanks

For those outthere using the Uniform server Z (which is great!), I can confirm this working for that server too.

php - XAMPP : Couldn't start Apache (Windows 10) - Stack Overflow

php apache xampp
Rectangle 27 195

Update : 4th August, 2015 :

If you have done clean installation of Windows 10, you may not have the Word Wide Web Publishing Service. In that case, simple WAMP/XAMPP installation should work fine.

If it doesn't, try installing Visual C++ Redistributable and then re-install WAMP/XAMPP.

I was facing a similar problem with WAMP. In Windows 10 TP, the Word Wide Web Publishing Service comes pre-installed. This is related to IIS and you can remove it if you don't need it.

This blocks the port 80 making Apache act weirdly. You can do the following and try again.

  • Go to Start, type in services.msc

This should make port 80 free and restarting WAMP/XAMPP should get you up and running!

What if I don't have "World Wide Web Publishing Service" as a service listed?

The name of the service is W3SVC while the description is World Wide Web Publishing Service. I initially did not think I had the service because I was not seeing the service described when sorting by name.

Yea, I had this problem as well, it kept telling me "A process witha PID of 4 is using port 80" when I checked it, the process name was "System". I kept thinking I could mess up my PC if i tried stopping this process. But this worked perfectly. Thanks

For those outthere using the Uniform server Z (which is great!), I can confirm this working for that server too.

php - XAMPP : Couldn't start Apache (Windows 10) - Stack Overflow

php apache xampp
Rectangle 27 3

The Apache project has a library called POI which can be used to generate MS Office files. It is a Java library but the advantage is that it can run on Linux with no trouble. This library has its limitations but it may do the job for you, and it's probably simpler to use than trying to run Word.

Another option would be OpenOffice but I can't exactly recommend it since I've never used it.

Create Word Document using PHP in Linux - Stack Overflow

php linux ms-word document
Rectangle 27 3

The Apache project has a library called POI which can be used to generate MS Office files. It is a Java library but the advantage is that it can run on Linux with no trouble. This library has its limitations but it may do the job for you, and it's probably simpler to use than trying to run Word.

Another option would be OpenOffice but I can't exactly recommend it since I've never used it.

Create Word Document using PHP in Linux - Stack Overflow

php linux ms-word document
Rectangle 27 1

No, they do not share anything but the word 'Apache' in their names.

So this means there are 2 threads involved in processing each request 1 for each of them and as far as I understand, httpd thread is blocked till the servlet returns a response. Is this correct?

First of all, it depends on your HTTPd setup, prefork or worker. Tomcat runs a JVM process with spawned threads. One request is handled by a thread on Tomcat side.

Does Apache httpd & Tomcat share same thread pool? - Stack Overflow

apache tomcat mod-jk
Rectangle 27 1

I had to download the tika-app-1.5.jar and execute the following command which returned me all the details i wanted

java -jar tika-app-1.5.jar -m test.docx
java -jar tika-app-1.5.jar -m test.doc
java -jar tika-app-1.5.jar -m test.pptx
java -jar tika-app-1.5.jar -m test.ppt

java - How can i get meta data about word file in command line using a...

java apache-tika
Rectangle 27 1

You are indeed misinterpreting the example: the file sample_lda_data.txt does not contain text (check it), but word count vectors that have already been extracted from a corpus. This is indicated in the text preceding the example:

In the following example, we load word count vectors representing a corpus of documents.

So, you need to get these word count vectors first from your own corpus, before proceeding as you try.

Thanks @desertnaut ! I need to read more about the LDA model. My understanding was I would feed in 'documents' and based on the counts of text in relation to each other it would derive topic probabilities.

apache spark - PySpark LDA Model Dense Vector from RDD - Stack Overflo...

apache-spark machine-learning pyspark apache-spark-mllib lda
Rectangle 27 2

package com.test;

 /**
   * @author Prasanth Pillai
   * @date 01-Feb-2012
   * @description : Below is the test class details
   * 
   * inputs a String from a user. Expect the String to contain spaces and    alphanumeric     characters only.
   * capitalizes all first letters of the words in the given String.
   * preserves all other characters (including spaces) in the String.
   * displays the result to the user.
   * 
   * Approach : I have followed a simple approach. However there are many string    utilities available 
   * for the same purpose. Example : WordUtils.capitalize(str) (from apache commons-lang)
   *
   */
  import java.io.BufferedReader;
  import java.io.IOException;
  import java.io.InputStreamReader;

  public class Test {

public static void main(String[] args) throws IOException{
    System.out.println("Input String :\n");
    InputStreamReader converter = new InputStreamReader(System.in);
    BufferedReader in = new BufferedReader(converter);
    String inputString = in.readLine();
    int length = inputString.length();
    StringBuffer newStr = new StringBuffer(0);
    int i = 0;
    int k = 0;
    /* This is a simple approach
     * step 1: scan through the input string
     * step 2: capitalize the first letter of each word in string
     * The integer k, is used as a value to determine whether the 
     * letter is the first letter in each word in the string.
     */

    while( i < length){
        if (Character.isLetter(inputString.charAt(i))){
            if ( k == 0){
            newStr = newStr.append(Character.toUpperCase(inputString.charAt(i)));
            k = 2;
            }//this else loop is to avoid repeatation of the first letter in output string 
            else {
            newStr = newStr.append(inputString.charAt(i));
            }
        } // for the letters which are not first letter, simply append to the output string. 
        else {
            newStr = newStr.append(inputString.charAt(i));
            k=0;
        }
        i+=1;           
    }
    System.out.println("new String ->"+newStr);
    }
}

java - How to capitalize the first character of each word in a string ...

java string capitalization
Rectangle 27 2

package com.test;

 /**
   * @author Prasanth Pillai
   * @date 01-Feb-2012
   * @description : Below is the test class details
   * 
   * inputs a String from a user. Expect the String to contain spaces and    alphanumeric     characters only.
   * capitalizes all first letters of the words in the given String.
   * preserves all other characters (including spaces) in the String.
   * displays the result to the user.
   * 
   * Approach : I have followed a simple approach. However there are many string    utilities available 
   * for the same purpose. Example : WordUtils.capitalize(str) (from apache commons-lang)
   *
   */
  import java.io.BufferedReader;
  import java.io.IOException;
  import java.io.InputStreamReader;

  public class Test {

public static void main(String[] args) throws IOException{
    System.out.println("Input String :\n");
    InputStreamReader converter = new InputStreamReader(System.in);
    BufferedReader in = new BufferedReader(converter);
    String inputString = in.readLine();
    int length = inputString.length();
    StringBuffer newStr = new StringBuffer(0);
    int i = 0;
    int k = 0;
    /* This is a simple approach
     * step 1: scan through the input string
     * step 2: capitalize the first letter of each word in string
     * The integer k, is used as a value to determine whether the 
     * letter is the first letter in each word in the string.
     */

    while( i < length){
        if (Character.isLetter(inputString.charAt(i))){
            if ( k == 0){
            newStr = newStr.append(Character.toUpperCase(inputString.charAt(i)));
            k = 2;
            }//this else loop is to avoid repeatation of the first letter in output string 
            else {
            newStr = newStr.append(inputString.charAt(i));
            }
        } // for the letters which are not first letter, simply append to the output string. 
        else {
            newStr = newStr.append(inputString.charAt(i));
            k=0;
        }
        i+=1;           
    }
    System.out.println("new String ->"+newStr);
    }
}

java - How to capitalize the first character of each word in a string ...

java string uppercase lowercase capitalization